<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <url><loc>https://manishklach.github.io/writings.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>1.0</priority></url>
  <url><loc>https://manishklach.github.io/writings/sparse-mqa-fused-moe-hyperconnections-frontier-models.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/the-whole-stack-everything-between-a-transistor-and-a-token.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/hbm-how-it-is-actually-built.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/inference-speed-is-a-memory-problem.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/inside-the-gpu-sm-warps-tensor-cores.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/kv-fabrics-treating-context-as-a-distributed-filesystem.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/the-pcie-tax-why-bypassing-the-host-doubles-moe-throughput.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/what-3nm-actually-means.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/agent-topology-problem.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/agentic-ai-cpu-dram-problem.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/dpu-agent-memory-controller.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/risc-cisc.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/true-cost-of-a-token.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/floating-point-in-ai.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/pcie-bottleneck-cxl-escape.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/power-contract-slo.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/ai-power-stack-grid-to-gpu.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/blackwell-gb300-nvl72-cooling-stack.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/high-bandwidth-flash-hbf-missing-tier-ai-inference.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/inference-batch-geometry-memory-cost.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/kv-hierarchy-lab-regret-aware-eviction-trace-driven-policy-evaluation.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/multi-tenant-inference-memory-fairness.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/nand-flash-invisible-backbone-ai-clusters.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/recompute-vs-transfer-frontier-inference.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/storage-geometry-100k-gpu-cluster-nand-demand.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/the-semiconductor-ecosystem.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/tpu-less-hbm-design-choice-not-limitation.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/tpu-optical-circuit-switching-3d-torus.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/tpu-software-managed-memory-real-advantage.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/vera-rubin-cooling-45c-supply-temperature.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/address-space-mmap-quiet-limits-modern-systems.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/cpo-making-memory-disaggregation-schedulable.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/from-ssd-to-gpu-to-sram-last-bottleneck-on-chip.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/hbm-fragmentation-guard.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/hbm-throttling-safe-kv-admission-reusenet-llm-inference.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/local-agent-deduplication-middleware.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/mcos-hfc-hardware-fabric-controller-memory-centric-ai-systems.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/mcos-memory-centric-operating-system-ai-systems.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/mcos-must-live-in-hardware-ai-memory-fabrics.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/rdma-in-the-age-of-ai.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/seam-orchestrator-workload-aware-kv-routing.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/thermal-debt-memory-problem-hot-dies-throttle-kv-prefetch.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/vllm-internals-cut-batch-size-gpu-melting.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/why-disk-rdma-gpu-is-still-fragmented-today.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/why-hbm-thermal-throttling-is-silent.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/attention-sink-problem-transformer-inference-memory-waste.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/photonics-stack-ai-networking-part-1.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/techdemoforge-local-first-engine-turning-technical-docs-into-demo-videos.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/thermal-debt-ai-clusters.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/inp-vs-silicon-photonics-vs-vcsel-ai-networking.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/memory-orchestrated-sparse-serving-next-frontier-in-long-context-inference.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/prefill-decode-disaggregation-why-the-next-big-inference-architecture-splits-the-job-in-two.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/scale-out-was-yesterday-scale-up-optics-is-the-next-battle.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/speculative-decoding-is-a-memory-problem.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/the-real-ai-bottleneck-is-moving-from-compute-to-interconnect-power-density.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/cpo-lpo-dsp-vcsel-what-actually-matters-for-ai-infrastructure.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/photonics-becoming-the-operating-system-of-ai-clusters.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/the-memory-scheduler-is-the-new-critical-path-in-ai-inference.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/why-cache-coherency-is-the-wrong-default-for-ai-machines.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/how-to-measure-gpu-underutilization-on-nvidia-h100-and-h200.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/memory-intent-ir-why-ai-compilers-must-emit-memory-plans.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/when-vram-stops-being-a-weight-warehouse.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/adaptive-compiler-runtime-power-contract-energy-optimal-edge-inference.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/ai-cluster-failure-seams.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/beyond-fault-tolerant-parallelism-ai-cluster-reliability.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/bounce-buffers-hidden-tax-ai-systems.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/deterministic-memory-orchestrated-inference-dma-bounded-on-chip-buffers.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/hardware-enforced-on-chip-memory-residency.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/introducing-chromelens-systems-grade-web-performance-telemetry.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/long-context-inference-needs-better-memory-policy.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/mhc-atlas-os-explainable-structure-guided-prioritization.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/mobile-agent-control-vendor-neutral-control-plane-terminal-native-coding-agents.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/predictive-weight-orchestration-runtime-control-for-multi-tier-weight-residency.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/schwab-portfolio-tools-local-practical-portfolio-infrastructure.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/sla-constrained-energy-aware-inference-scheduling-arm-edge-systems.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/srmic-x1-rethinking-memory-hierarchy-llm-decode.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/the-real-tax-in-ai-systems-is-moving-bytes.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/vorchestrate-controller-centric-memory-policy.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/what-bigger-l2-actually-buys-you.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
  <url><loc>https://manishklach.github.io/writings/why-ai-needs-a-new-memory-hierarchy-not-just-bigger-caches.html</loc><lastmod>2026-04-19</lastmod><changefreq>weekly</changefreq><priority>0.8</priority></url>
</urlset>
