diff --git a/proposal/dreamer-basic-arch.png b/proposal/dreamer-basic-arch.png
new file mode 100644
index 0000000..f053420
Binary files /dev/null and b/proposal/dreamer-basic-arch.png differ
diff --git a/proposal/index.html b/proposal/index.html
index 14b2786..b15f051 100644
--- a/proposal/index.html
+++ b/proposal/index.html
@@ -189,6 +189,54 @@
             </section>
         );
 
+        const Motivation = () => (
+            <section className="py-20 px-6 max-w-6xl mx-auto">
+                <div className="max-w-5xl mx-auto">
+                    <h2 className="text-3xl font-bold mb-8 text-center">The Bottleneck in Standard Dreamer</h2>
+
+                    <div className="glass-panel rounded-2xl p-8 md:p-12 mb-8">
+                        <img
+                            src="dreamer-basic-arch.png"
+                            alt="Dreamer Architecture Comparison"
+                            className="w-full rounded-lg mb-6"
+                        />
+                        <div className="text-center text-xs text-slate-500 mb-8">
+                            Figure: (a) World Model Learning phase requires training encoder/decoder for reconstruction,
+                            (b) Actor-Critic Learning can only begin after world model converges
+                        </div>
+                    </div>
+
+                    <div className="grid md:grid-cols-2 gap-6">
+                        <div className="bg-rose-900/20 border border-rose-500/30 rounded-xl p-6">
+                            <h3 className="text-lg font-bold text-rose-400 mb-3 flex items-center gap-2">
+                                <span className="text-2xl">⚠️</span> The Problem
+                            </h3>
+                            <p className="text-slate-300 text-sm leading-relaxed">
+                                Dreamer-V3 must first train its <strong>Encoder</strong> and <strong>Decoder</strong> networks
+                                to accurately reconstruct pixel-level observations. This reconstruction objective delays the
+                                actual <strong>Actor-Critic training</strong>, requiring millions of environment steps before
+                                the world model produces useful latent representations. The decoder alone adds substantial
+                                computational overhead while modeling irrelevant visual details.
+                            </p>
+                        </div>
+
+                        <div className="bg-cyan-900/20 border border-cyan-500/30 rounded-xl p-6">
+                            <h3 className="text-lg font-bold text-cyan-400 mb-3 flex items-center gap-2">
+                                <span className="text-2xl">✨</span> The V-JEPA Solution
+                            </h3>
+                            <p className="text-slate-300 text-sm leading-relaxed">
+                                By replacing the trainable encoder with a <strong>frozen V-JEPA backbone</strong>, we eliminate
+                                the need for pixel reconstruction entirely. This dramatically reduces trainable parameters
+                                (no encoder training, no decoder needed), saving compute while potentially <strong>increasing
+                                generalization</strong> due to V-JEPA's pretraining on millions of diverse videos. The agent
+                                can immediately leverage "adult-level" visual understanding.
+                            </p>
+                        </div>
+                    </div>
+                </div>
+            </section>
+        );
+
         const ArchitectureViewer = () => {
             const [mode, setMode] = useState('jepa'); // 'standard' or 'jepa'
 
@@ -345,22 +393,55 @@
 
         const Challenges = () => (
             <section id="challenges" className="py-24 px-6 max-w-5xl mx-auto">
-                <div className="bg-gradient-to-br from-amber-900/20 to-slate-800/50 border border-amber-500/20 rounded-2xl p-8 md:p-12">
-                    <div className="flex items-start gap-6">
-                        <div className="bg-amber-500/10 p-3 rounded-full hidden md:block">
-                            <AlertTriangle className="text-amber-500" size={32} />
-                        </div>
-                        <div>
-                            <h2 className="text-2xl font-bold mb-4 text-amber-500">Critical Challenge: The "Red Light" Problem</h2>
-                            <p className="text-slate-300 mb-6 leading-relaxed">
-                                A significant risk of using a completely frozen encoder is the potential filtering of tiny, task-relevant details. A small red light might be statistically insignificant in general internet video data (V-JEPA's training set) but critical for a specific RL task (e.g., a braking signal).
-                            </p>
-                            
-                            <div className="bg-slate-900/80 p-6 rounded-xl border-l-4 border-cyan-500">
-                                <h3 className="text-lg font-bold text-white mb-2">Proposed Solution: Trainable Adapters</h3>
-                                <p className="text-slate-400 text-sm leading-relaxed">
-                                    To mitigate this, we insert lightweight <strong>Trainable Adapters</strong> (Low-Rank Adaptation or similar) into the JEPA backbone. This allows the RL signal to tune attention toward task-specific features without destroying the pretrained general knowledge, maintaining the "adult-level" visual processing while allowing for task specialization.
+                <h2 className="text-3xl font-bold mb-8 text-center">Critical Challenges & Risks</h2>
+
+                <div className="space-y-6">
+                    {/* Challenge 1: Red Light Problem */}
+                    <div className="bg-gradient-to-br from-amber-900/20 to-slate-800/50 border border-amber-500/20 rounded-2xl p-8 md:p-12">
+                        <div className="flex items-start gap-6">
+                            <div className="bg-amber-500/10 p-3 rounded-full hidden md:block">
+                                <AlertTriangle className="text-amber-500" size={32} />
+                            </div>
+                            <div>
+                                <h3 className="text-2xl font-bold mb-4 text-amber-500">Challenge 1: The "Red Light" Problem</h3>
+                                <p className="text-slate-300 mb-6 leading-relaxed">
+                                    A significant risk of using a completely frozen encoder is the potential filtering of tiny, task-relevant details. A small red light might be statistically insignificant in general internet video data (V-JEPA's training set) but critical for a specific RL task (e.g., a braking signal).
                                 </p>
+
+                                <div className="bg-slate-900/80 p-6 rounded-xl border-l-4 border-cyan-500">
+                                    <h4 className="text-lg font-bold text-white mb-2">Proposed Solution: Trainable Adapters</h4>
+                                    <p className="text-slate-400 text-sm leading-relaxed">
+                                        To mitigate this, we insert lightweight <strong>Trainable Adapters</strong> (Low-Rank Adaptation or similar) into the JEPA backbone. This allows the RL signal to tune attention toward task-specific features without destroying the pretrained general knowledge, maintaining the "adult-level" visual processing while allowing for task specialization.
+                                    </p>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+
+                    {/* Challenge 2: Validation Problem */}
+                    <div className="bg-gradient-to-br from-red-900/20 to-slate-800/50 border border-red-500/20 rounded-2xl p-8 md:p-12">
+                        <div className="flex items-start gap-6">
+                            <div className="bg-red-500/10 p-3 rounded-full hidden md:block">
+                                <Eye className="text-red-500" size={32} />
+                            </div>
+                            <div>
+                                <h3 className="text-2xl font-bold mb-4 text-red-400">Challenge 2: The Validation Problem</h3>
+                                <p className="text-slate-300 mb-6 leading-relaxed">
+                                    Without a decoder to reconstruct pixel representations, it becomes significantly harder to validate that the hidden state <em>actually</em> represents the world state accurately. In standard Dreamer, poor reconstruction quality serves as a clear diagnostic signal that something is wrong with the latent representations. Removing this feedback loop makes debugging and verification more challenging.
+                                </p>
+
+                                <div className="bg-slate-900/80 p-6 rounded-xl border-l-4 border-purple-500">
+                                    <h4 className="text-lg font-bold text-white mb-2">Proposed Solution: Alternative Validation Methods</h4>
+                                    <p className="text-slate-400 text-sm leading-relaxed mb-3">
+                                        We propose using <strong>proxy validation metrics</strong> to ensure representation quality:
+                                    </p>
+                                    <ul className="text-slate-400 text-sm space-y-2 list-disc list-inside">
+                                        <li><strong>Latent prediction accuracy:</strong> Measure how well future latent states are predicted in V-JEPA space</li>
+                                        <li><strong>Downstream task performance:</strong> Monitor RL reward signals and convergence speed as indirect validation</li>
+                                        <li><strong>Probing classifiers:</strong> Train lightweight probes to predict known world properties (object positions, states) from latents</li>
+                                        <li><strong>Optional sparse decoding:</strong> Periodically reconstruct a small batch of frames for qualitative inspection</li>
+                                    </ul>
+                                </div>
                             </div>
                         </div>
                     </div>
@@ -389,6 +470,7 @@
                 <Nav />
                 <Hero />
                 <Abstract />
+                <Motivation />
                 <ArchitectureViewer />
                 <Features />
                 <Challenges />