@prefix :       <https://thinkingmachines.ai/blog/interaction-models/#> .
@prefix rdf:    <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs:   <http://www.w3.org/2000/01/rdf-schema#> .
@prefix owl:    <http://www.w3.org/2002/07/owl#> .
@prefix xsd:    <http://www.w3.org/2001/XMLSchema#> .
@prefix skos:   <http://www.w3.org/2004/02/skos/core#> .
@prefix schema: <http://schema.org/> .
@prefix dct:    <http://purl.org/dc/terms/> .
@prefix foaf:   <http://xmlns.com/foaf/0.1/> .
@prefix prov:   <http://www.w3.org/ns/prov#> .

: a owl:Ontology ;
    schema:name "Interaction Models — KG Ontology"@en ;
    schema:description "Semantic model of Thinking Machines Lab's article introducing interaction models — AI models that handle interaction natively via micro-turn design rather than external scaffolding. Covers the multi-stream architecture, encoder-free early fusion, background model split, and novel interactivity benchmarks."@en ;
    rdfs:label "Interaction Models — Knowledge Graph" ;
    schema:identifier "https://thinkingmachines.ai/blog/interaction-models/" ;
    dct:source <https://thinkingmachines.ai/blog/interaction-models/> ;
    dct:date "2026-05-11"^^xsd:date .

:ArchitectureConcept a owl:Class ; rdfs:label "Architecture Concept" ; rdfs:comment "A component or design pattern in the interaction model architecture." ; rdfs:isDefinedBy : .
:KeyPrinciple a owl:Class ; rdfs:label "Key Principle" ; rdfs:comment "A foundational insight driving the interaction model approach to human-AI collaboration." ; rdfs:isDefinedBy : .
:BenchmarkResult a owl:Class ; rdfs:label "Benchmark Result" ; rdfs:comment "A quantitative performance metric from the interaction model evaluation." ; rdfs:isDefinedBy : .

:article a schema:Article, schema:TechArticle ;
    rdfs:label "Interaction Models: A Scalable Approach to Human-AI Collaboration" ;
    schema:name "Interaction Models: A Scalable Approach to Human-AI Collaboration"@en ;
    schema:headline "Interaction Models: A Scalable Approach to Human-AI Collaboration"@en ;
    schema:description "Thinking Machines Lab introduces interaction models — AI models that handle interactivity natively through micro-turn design (200ms chunks), encoder-free early fusion, and a split architecture separating real-time interaction from background reasoning. A 276B MoE model (12B active) achieves state-of-the-art on streaming benchmarks while being the first to demonstrate meaningful performance on novel interactivity dimensions: time awareness, visual proactivity, and simultaneous speech."@en ;
    schema:url <https://thinkingmachines.ai/blog/interaction-models/> ;
    schema:datePublished "2026-05-11"^^xsd:date ;
    schema:publisher :thinkingMachinesLab ;
    schema:about :interactionModels, :humanAIcollaboration, :microTurnDesign, :streamingAI ;
    schema:hasPart :bottleneckSection, :architectureSection, :benchmarksSection, :faqPage, :glossarySet, :howtoSection, :architectureGroup, :principlesGroup, :benchmarksGroup, : ; prov:wasGeneratedBy :kgGeneratorSkill .

:thinkingMachinesLab a schema:Organization ;
    schema:name "Thinking Machines Lab"@en ;
    schema:url <https://thinkingmachines.ai> .

:interactionModels a skos:Concept, schema:DefinedTerm ;
    schema:name "Interaction Models"@en ;
    schema:description "AI models that handle interaction natively rather than through external scaffolding — using micro-turn design (200ms chunks), multi-stream input/output, and a split architecture separating real-time presence from sustained reasoning. The core thesis: interactivity should scale alongside intelligence."@en ; rdfs:isDefinedBy : .

:humanAIcollaboration a skos:Concept, schema:DefinedTerm ;
    schema:name "Human-AI Collaboration"@en ;
    schema:description "The bottleneck in current AI interfaces — turn-based models force humans to adapt to AI constraints. Interaction models aim to keep humans in the loop by enabling continuous, real-time exchange where humans can interject, clarify, and course-correct during generation."@en ; rdfs:isDefinedBy : .

:microTurnDesign a skos:Concept, schema:DefinedTerm ;
    schema:name "Micro-Turn Design"@en ;
    schema:description "A time-aligned architecture processing 200ms chunks of input and output simultaneously — enabling continuous exchange rather than rigid turn-based conversation. Based on Clark and Brennan's grounding in communication theory."@en ; rdfs:isDefinedBy : .

:streamingAI a skos:Concept, schema:DefinedTerm ;
    schema:name "Streaming AI"@en ;
    schema:description "Real-time AI interaction across audio, video, and text modalities — models that perceive continuously rather than waiting for complete user input. Enables verbal interjections, simultaneous speech, and proactive visual awareness."@en ; rdfs:isDefinedBy : .

:architectureGroup a schema:ArticleSection ; schema:name "Architecture"@en ; schema:position 3 ; schema:hasPart :microTurn, :encoderFree, :interactionBackground, :moEdesign, :sglangStreaming .
:microTurn a :ArchitectureConcept ; rdfs:label "Micro-Turn Processing" ; rdfs:comment "200ms time-aligned chunks of input and output processed simultaneously — enabling continuous exchange rather than rigid turn-taking. The core innovation enabling natural collaboration."@en .
:encoderFree a :ArchitectureConcept ; rdfs:label "Encoder-Free Early Fusion" ; rdfs:comment "Minimal preprocessing — audio via dMel with lightweight embedding, images as 40x40 patches encoded by hMLP, audio decoder using a flow head. All co-trained from scratch for maximal coherence across modalities."@en .
:interactionBackground a :ArchitectureConcept ; rdfs:label "Interaction + Background Model Split" ; rdfs:comment "Real-time interaction model handles presence and responsiveness; an asynchronous background model handles sustained reasoning, tool use, and search. Results stream back for natural interleaving in conversation."@en .
:moEdesign a :ArchitectureConcept ; rdfs:label "276B MoE (12B Active)" ; rdfs:comment "Mixture of Experts architecture — 276B total parameters with 12B active per forward pass. Gather+gemv MoE kernels for efficient inference. Bitwise deterministic training via NVLS-based all-reduce."@en .
:sglangStreaming a :ArchitectureConcept ; rdfs:label "SGLang Streaming Sessions" ; rdfs:comment "Inference optimization — streaming sessions in SGLang (upstreamed patch), split-KV attention for consistent determinism, under 5% overhead vs. standard kernels."@en .

:principlesGroup a schema:ArticleSection ; schema:name "Key Principles"@en ; schema:position 2 ; schema:hasPart :interactivityScales, :humansInTheLoop, :nativeNotScaffolding .
:interactivityScales a :KeyPrinciple ; rdfs:label "Interactivity Should Scale with Intelligence" ; rdfs:comment "The core thesis — as models become more intelligent, the interface must become more interactive. Turn-based chat is a bottleneck that pushes humans out of workflows not because the work doesn't need them, but because the interface has no room for them."@en .
:humansInTheLoop a :KeyPrinciple ; rdfs:label "Keep Humans in the Collaborative Loop" ; rdfs:comment "Autonomous agents are valuable but don't serve collaborative workflows. Humans need to stay in the loop — clarifying, course-correcting, and giving feedback in real time. Interaction models are designed for collaboration, not delegation."@en .
:nativeNotScaffolding a :KeyPrinciple ; rdfs:label "Native Interaction, Not External Scaffolding" ; rdfs:comment "Interaction capability shouldn't be bolted on externally. The model itself must handle interaction natively — processing multi-stream input and output simultaneously — rather than relying on external orchestration layers."@en .

:benchmarksGroup a schema:ArticleSection ; schema:name "Benchmarks"@en ; schema:position 4 ; schema:hasPart :turnTakingLatency, :fdbV15, :fdbV3, :timeSpeak, :cueSpeak, :proactiveVideo .
:turnTakingLatency a :BenchmarkResult ; rdfs:label "FD-bench V1 Turn-Taking" ; rdfs:comment "0.40s latency vs. 1.18s for GPT-2.0 minimal — interaction models dramatically reduce the gap between human speech and AI response."@en .
:fdbV15 a :BenchmarkResult ; rdfs:label "FD-bench V1.5 Average" ; rdfs:comment "77.8 vs. 54.3 for Gemini minimal — leading interaction quality across streaming benchmarks."@en .
:fdbV3 a :BenchmarkResult ; rdfs:label "FD-bench V3 Response Quality" ; rdfs:comment "82.8% / 68.0% Pass@1 vs. 81.0% / 58.0% for GPT-2.0 xhigh — combining intelligence with interactivity."@en .
:timeSpeak a :BenchmarkResult ; rdfs:label "TimeSpeak (Novel)" ; rdfs:comment "64.7 vs. 4.3 for GPT-2.0 minimal — measuring time awareness in conversation. No existing model can meaningfully perform this task."@en .
:cueSpeak a :BenchmarkResult ; rdfs:label "CueSpeak (Novel)" ; rdfs:comment "81.7 vs. 2.9 for GPT-2.0 minimal — measuring ability to respond to verbal and visual cues during speech."@en .
:proactiveVideo a :BenchmarkResult ; rdfs:label "ProactiveVideoQA (Novel)" ; rdfs:comment "33.5 PAUC vs. 25.0 no-response baseline — measuring proactive visual awareness and interjection during video stream."@en .

:bottleneckSection a schema:ArticleSection ; schema:name "The Collaboration Bottleneck"@en ; schema:position 1 ; schema:description "Current turn-based interfaces force humans to adapt to AI constraints. Models wait with no perception until input is complete. Autonomous agents are valuable but don't serve collaborative workflows — humans need to stay in the loop."@en .
:architectureSection a schema:ArticleSection ; schema:name "System Architecture"@en ; schema:position 3 ; schema:description "A multi-stream, micro-turn design processing 200ms chunks simultaneously. Encoder-free early fusion minimizes preprocessing. Real-time interaction model + async background model for sustained reasoning and tool use."@en .
:benchmarksSection a schema:ArticleSection ; schema:name "Benchmarks"@en ; schema:position 4 ; schema:description "TML-Interaction-Small leads on streaming benchmarks and is the first model to achieve meaningful scores on novel interactivity dimensions — time awareness, visual proactivity, and simultaneous speech."@en .

:faqPage a schema:FAQPage ; schema:name "Frequently Asked Questions"@en ; schema:mainEntity :faq1, :faq2, :faq3, :faq4, :faq5, :faq6, :faq7, :faq8, :faq9, :faq10, :faq11, :faq12 .
:faq1 a schema:Question ; schema:name "What are interaction models?"@en ; schema:acceptedAnswer :a1 . :a1 a schema:Answer ; schema:text "AI models that handle interaction natively through micro-turn design rather than external scaffolding. They process 200ms chunks of input and output simultaneously across audio, video, and text — enabling continuous human-AI collaboration instead of rigid turn-based chat."@en .
:faq2 a schema:Question ; schema:name "What problem do interaction models solve?"@en ; schema:acceptedAnswer :a2 . :a2 a schema:Answer ; schema:text "The collaboration bottleneck — turn-based interfaces push humans out of workflows not because their input isn't needed, but because the interface has no room for them. Interaction models keep humans in the loop by enabling real-time interjections, clarifications, and course corrections."@en .
:faq3 a schema:Question ; schema:name "What is micro-turn design?"@en ; schema:acceptedAnswer :a3 . :a3 a schema:Answer ; schema:text "A time-aligned architecture processing 200ms chunks of input and output simultaneously. Instead of waiting for a complete user turn before responding, the model perceives and generates continuously — enabling simultaneous speech, verbal interjections, and real-time responsiveness."@en .
:faq4 a schema:Question ; schema:name "What is encoder-free early fusion?"@en ; schema:acceptedAnswer :a4 . :a4 a schema:Answer ; schema:text "Minimal preprocessing across modalities — audio via dMel with lightweight embedding, images as 40x40 patches encoded by hMLP, audio decoder using a flow head. All components are co-trained from scratch for maximal coherence."@en .
:faq5 a schema:Question ; schema:name "How does the interaction + background model split work?"@en ; schema:acceptedAnswer :a5 . :a5 a schema:Answer ; schema:text "The real-time interaction model handles presence and responsiveness. An asynchronous background model handles sustained reasoning, tool use, and search — streaming results back for natural interleaving in conversation. Both share context."@en .
:faq6 a schema:Question ; schema:name "What model size and architecture is used?"@en ; schema:acceptedAnswer :a6 . :a6 a schema:Answer ; schema:text "TML-Interaction-Small is a 276B parameter Mixture of Experts model with 12B active parameters per forward pass. Uses gather+gemv MoE kernels, SGLang streaming sessions, and bitwise deterministic training kernels."@en .
:faq7 a schema:Question ; schema:name "How does it compare to competitors on streaming benchmarks?"@en ; schema:acceptedAnswer :a7 . :a7 a schema:Answer ; schema:text "0.40s turn-taking latency (vs. 1.18s GPT-2.0 minimal). 77.8 FD-bench V1.5 average (vs. 54.3 Gemini minimal). 82.8% FD-bench V3 Response Quality (vs. 81.0% GPT-2.0 xhigh). Leads or is competitive across all streaming metrics."@en .
:faq8 a schema:Question ; schema:name "What novel interactivity benchmarks were created?"@en ; schema:acceptedAnswer :a8 . :a8 a schema:Answer ; schema:text "TimeSpeak (time awareness: 64.7 vs. 4.3 GPT-2.0), CueSpeak (verbal/visual cues: 81.7 vs. 2.9 GPT-2.0), RepCount-A (audio counting: 35.4 vs. 1.3), ProactiveVideoQA (visual proactivity: 33.5 vs. 25.0 baseline), Charades (visual grounding: 32.4 vs. 0). No existing model can meaningfully perform any of these tasks."@en .
:faq9 a schema:Question ; schema:name "What are the current limitations?"@en ; schema:acceptedAnswer :a9 . :a9 a schema:Answer ; schema:text "Long session context management, compute and deployment reliability dependence, safety research still needed (modality-appropriate refusals, long-horizon robustness), larger pretrained models are currently too slow to serve, and background agent integration can be improved."@en .
:faq10 a schema:Question ; schema:name "How does safety work for interaction models?"@en ; schema:acceptedAnswer :a10 . :a10 a schema:Answer ; schema:text "Two axes: modality-appropriate refusals (using TTS-generated refusal data for colloquial speech) and long-horizon robustness (multi-turn refusal data via automated red-teaming). 99.0% Harmbench refusal rate."@en .
:faq11 a schema:Question ; schema:name "What is the philosophical grounding?"@en ; schema:acceptedAnswer :a11 . :a11 a schema:Answer ; schema:text "Drawn from Clark and Brennan's grounding in communication theory, Ong's orality and literacy, Scott's métis reasoning, and Hayek's knowledge in society. The design is grounded in decades of research on how humans collaborate and communicate."@en .
:faq12 a schema:Question ; schema:name "When will interaction models be available?"@en ; schema:acceptedAnswer :a12 . :a12 a schema:Answer ; schema:text "Limited research preview in coming months, wider release later in 2026. Thinking Machines Lab is also offering research grants for developing interactivity evaluation frameworks."@en .

:glossarySet a schema:DefinedTermSet ; schema:name "Glossary of Key Terms"@en ; schema:hasDefinedTerm :interactionModels, :humanAIcollaboration, :microTurnDesign, :streamingAI, :moETerm, :sglangTerm, :dMelTerm .
:moETerm a schema:DefinedTerm ; schema:name "Mixture of Experts (MoE)"@en ; schema:description "Architecture where only a subset of model parameters are active per forward pass — 276B total, 12B active — enabling large models at lower inference cost."@en .
:sglangTerm a schema:DefinedTerm ; schema:name "SGLang"@en ; schema:description "Inference framework supporting streaming sessions, split-KV attention, and efficient MoE kernel strategies."@en .
:dMelTerm a schema:DefinedTerm ; schema:name "dMel (Differentiable Mel-Spectrogram)"@en ; schema:description "Lightweight audio encoding — converts raw audio to mel-spectrogram representation in a differentiable manner for end-to-end training."@en .

:howtoSection a schema:HowTo ; schema:name "How to Build Interaction Models"@en ; schema:description "A seven-step guide derived from Thinking Machines Lab's approach to building native interaction models."@en ; schema:step :step1, :step2, :step3, :step4, :step5, :step6, :step7 .
:step1 a schema:HowToStep ; schema:name "Start with Micro-Turn Design"@en ; schema:position 1 ; schema:text "Process audio, video, and text in 200ms chunks for simultaneous input/output. Design the model to perceive continuously rather than waiting for complete user turns. Ground the design in communication theory."@en .
:step2 a schema:HowToStep ; schema:name "Use Encoder-Free Early Fusion"@en ; schema:position 2 ; schema:text "Minimize preprocessing — lightweight encoders (dMel for audio, hMLP for images) co-trained from scratch. Avoid heavy external encoders that add latency and reduce cross-modal coherence."@en .
:step3 a schema:HowToStep ; schema:name "Split Interaction from Background Reasoning"@en ; schema:position 3 ; schema:text "Separate real-time interaction (presence, responsiveness) from sustained computation (reasoning, tool use, search). Stream background results back for natural interleaving in conversation."@en .
:step4 a schema:HowToStep ; schema:name "Optimize with MoE and Streaming Inference"@en ; schema:position 4 ; schema:text "Use Mixture of Experts with low active parameter count (12B active from 276B). Implement gather+gemv kernels, SGLang streaming sessions, and bitwise deterministic training for stability."@en .
:step5 a schema:HowToStep ; schema:name "Build Novel Interactivity Benchmarks"@en ; schema:position 5 ; schema:text "Standard benchmarks don't measure interactivity. Create tests for time awareness, verbal/visual cue following, simultaneous speech, and proactive visual interjection. Existing models score near zero on these dimensions."@en .
:step6 a schema:HowToStep ; schema:name "Design Safety for Continuous Interaction"@en ; schema:position 6 ; schema:text "Safety in turn-based models doesn't transfer. Build modality-appropriate refusals (TTS-generated for colloquial speech) and long-horizon robustness via multi-turn automated red-teaming."@en .
:step7 a schema:HowToStep ; schema:name "Release Incrementally with Research Grants"@en ; schema:position 7 ; schema:text "Start with limited research previews. Fund external evaluation framework development. The interactivity measurement ecosystem needs to grow alongside the models."@en .

:kgGeneratorSkill a schema:SoftwareApplication ; schema:name "kg-generator skill"@en ; schema:url <https://github.com/anomalyco/opencode/tree/main/kg-generator> ; schema:description "LLM-prompt-based Knowledge Graph generation skill."@en .
