{"id":884,"date":"2025-08-01T01:56:00","date_gmt":"2025-08-01T01:56:00","guid":{"rendered":"https:\/\/ouyangminwei.com\/?p=884"},"modified":"2025-07-18T09:57:26","modified_gmt":"2025-07-18T09:57:26","slug":"the-evolution-of-reinforcement-learning","status":"publish","type":"post","link":"https:\/\/ouyangminwei.com\/index.php\/2025\/08\/01\/the-evolution-of-reinforcement-learning\/","title":{"rendered":"The Evolution of Reinforcement Learning"},"content":{"rendered":"\n<!DOCTYPE html>\n<html lang=\"zh-Hant\" class=\"scroll-smooth\">\n<head>\n    <meta charset=\"UTF-8\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    <title>\u5f37\u5316\u5b78\u7fd2\u6f14\u9032\u4e4b\u8def<\/title>\n    <script src=\"https:\/\/cdn.tailwindcss.com\"><\/script>\n    <script src=\"https:\/\/cdn.jsdelivr.net\/npm\/chart.js\"><\/script>\n    <link rel=\"preconnect\" href=\"https:\/\/fonts.googleapis.com\">\n    <link rel=\"preconnect\" href=\"https:\/\/fonts.gstatic.com\" crossorigin>\n    <link href=\"https:\/\/fonts.googleapis.com\/css2?family=Noto+Sans+TC:wght@400;500;700&#038;display=swap\" rel=\"stylesheet\">\n    <!-- Chosen Palette: Academic Calm -->\n    <!-- Application Structure Plan: The application is designed as a single, vertical scrolling journey, mapping the chronological and conceptual evolution of Reinforcement Learning. This linear, story-telling structure is chosen for its intuitive nature, guiding users from foundational concepts to advanced topics seamlessly. A sticky sidebar navigation allows for quick access to key milestones, while an interactive \"Evolution Map\" at the top provides a visual overview and an alternative navigation method. This structure breaks down a complex academic topic into digestible, interconnected sections, prioritizing user understanding and a clear learning path over a dense, unstructured report format. -->\n    <!-- Visualization & Content Choices: \n        - Report Info: Overall RL Evolution -> Goal: Provide a high-level map and navigation -> Viz\/Method: Interactive timeline\/flowchart made with HTML\/CSS divs and JS for interactivity -> Justification: Visually anchors the user's journey and clarifies the relationships between different algorithms without using SVG\/Mermaid.\n        - Report Info: Algorithm Comparison (DQN, PPO, etc.) -> Goal: Compare qualitative attributes -> Viz\/Method: Chart.js Radar Chart -> Justification: Effectively visualizes multi-dimensional trade-offs (e.g., stability, sample efficiency) in a single, interactive graphic.\n        - Report Info: Conceptual architectures (Actor-Critic, GAN) -> Goal: Explain component relationships -> Viz\/Method: Styled HTML divs in a flexbox layout -> Justification: Creates simple, clear, and accessible diagrams for abstract concepts without external libraries or complex graphics.\n        - Report Info: Detailed algorithm explanations -> Goal: Provide in-depth knowledge on demand -> Viz\/Method: Interactive accordion\/collapsible sections (HTML\/JS) -> Justification: Manages information density, allowing users to explore details at their own pace without being overwhelmed initially.\n    -->\n    <!-- CONFIRMATION: NO SVG graphics used. NO Mermaid JS used. -->\n    <style>\n        body {\n            font-family: 'Noto Sans TC', sans-serif;\n            background-color: #f8f9fa;\n            color: #343a40;\n        }\n        .timeline-item {\n            position: relative;\n            padding-bottom: 2.5rem; \n            padding-left: 2.5rem;\n        }\n        .timeline-item:not(:last-child)::before {\n            content: '';\n            position: absolute;\n            left: 18px;\n            top: 18px;\n            width: 2px;\n            height: 100%;\n            background-color: #dee2e6;\n        }\n        .timeline-dot {\n            position: absolute;\n            left: 0;\n            top: 0;\n            display: flex;\n            align-items: center;\n            justify-content: center;\n            width: 40px;\n            height: 40px;\n            border-radius: 50%;\n            background-color: #4a5568;\n            color: white;\n            border: 4px solid #f8f9fa;\n            z-index: 10;\n        }\n        .chart-container {\n            position: relative;\n            width: 100%;\n            max-width: 600px;\n            margin-left: auto;\n            margin-right: auto;\n            height: 350px;\n            max-height: 450px;\n        }\n        @media (min-width: 768px) {\n            .chart-container {\n                height: 450px;\n            }\n        }\n        .content-card {\n            background-color: white;\n            border-radius: 0.75rem;\n            border: 1px solid #e2e8f0;\n            transition: all 0.3s ease-in-out;\n            box-shadow: 0 4px 6px -1px rgb(0 0 0 \/ 0.1), 0 2px 4px -2px rgb(0 0 0 \/ 0.1);\n        }\n        .content-card:hover {\n            transform: translateY(-5px);\n            box-shadow: 0 10px 15px -3px rgb(0 0 0 \/ 0.1), 0 4px 6px -2px rgb(0 0 0 \/ 0.1);\n        }\n        .details-content {\n            overflow: hidden;\n            transition: max-height 0.5s ease-in-out, opacity 0.5s ease-in-out;\n            max-height: 0;\n            opacity: 0;\n        }\n        .details-content.open {\n            max-height: 1000px;\n            opacity: 1;\n        }\n    <\/style>\n<\/head>\n<body class=\"bg-gray-50 text-gray-800\">\n\n    <div class=\"flex\">\n        <aside class=\"sticky top-0 h-screen bg-white shadow-md p-4 w-64 hidden lg:block\">\n            <h2 class=\"text-xl font-bold text-gray-700 mb-6\">\u6f14\u9032\u5c0e\u822a<\/h2>\n            <nav>\n                <ul class=\"space-y-3\">\n                    <li><a href=\"#intro\" class=\"font-semibold text-gray-600 hover:text-blue-600 transition\">\u4ecb\u7d39<\/a><\/li>\n                    <li><a href=\"#part1\" class=\"font-semibold text-gray-600 hover:text-blue-600 transition\">\u7b2c\u4e00\u7ae0\uff1a\u8207\u74b0\u5883\u4e92\u52d5\u5b78\u7fd2<\/a>\n                        <ul class=\"pl-4 mt-2 space-y-2 text-sm\">\n                            <li><a href=\"#dqn\" class=\"text-gray-500 hover:text-blue-500 transition\">Deep Q-Learning (DQN)<\/a><\/li>\n                            <li><a href=\"#ppo\" class=\"text-gray-500 hover:text-blue-500 transition\">Policy Gradients &#038; PPO<\/a><\/li>\n                        <\/ul>\n                    <\/li>\n                    <li><a href=\"#part2\" class=\"font-semibold text-gray-600 hover:text-blue-600 transition\">\u7b2c\u4e8c\u7ae0\uff1a\u5b78\u7fd2\u74b0\u5883\u6a21\u578b<\/a>\n                        <ul class=\"pl-4 mt-2 space-y-2 text-sm\">\n                            <li><a href=\"#world-models\" class=\"text-gray-500 hover:text-blue-500 transition\">World Models<\/a><\/li>\n                        <\/ul>\n                    <\/li>\n                     <li><a href=\"#comparison\" class=\"font-semibold text-gray-600 hover:text-blue-600 transition\">\u6f14\u7b97\u6cd5\u6bd4\u8f03<\/a><\/li>\n                    <li><a href=\"#part3\" class=\"font-semibold text-gray-600 hover:text-blue-600 transition\">\u7b2c\u4e09\u7ae0\uff1a\u7121\u74b0\u5883\u4e92\u52d5\u5b78\u7fd2<\/a>\n                        <ul class=\"pl-4 mt-2 space-y-2 text-sm\">\n                            <li><a href=\"#imitation\" class=\"text-gray-500 hover:text-blue-500 transition\">\u6a21\u4eff\u5b78\u7fd2(Imitation Learning)<\/a><\/li>\n                             <li><a href=\"#irl\" class=\"text-gray-500 hover:text-blue-500 transition\">\u9006\u5411\u5f37\u5316\u5b78\u7fd2(IRL)<\/a><\/li>\n                        <\/ul>\n                    <\/li>\n                    <li><a href=\"#summary\" class=\"font-semibold text-gray-600 hover:text-blue-600 transition\">\u7e3d\u7d50<\/a><\/li>\n                <\/ul>\n            <\/nav>\n        <\/aside>\n\n        <main class=\"w-full p-4 md:p-10 lg:p-12\">\n            <header id=\"intro\" class=\"text-center mb-16\">\n                <h1 class=\"text-4xl md:text-5xl font-bold text-gray-800 mb-4\">\u5f37\u5316\u5b78\u7fd2(RL)\u6f14\u9032\u4e4b\u8def<\/h1>\n                <p class=\"text-lg text-gray-600 max-w-3xl mx-auto\">\u672c\u5831\u544a\u5c07\u5e36\u60a8\u8d70\u904e\u5f37\u5316\u5b78\u7fd2\u7684\u6f14\u5316\u6b77\u7a0b\uff0c\u5f9e\u7d93\u5178\u7684\u50f9\u503c\u5b78\u7fd2\u5230\u5148\u9032\u7684\u7b56\u7565\u68af\u5ea6\u8207\u4e16\u754c\u6a21\u578b\uff0c\u6700\u7d42\u63a2\u8a0e\u5982\u4f55\u5728\u6c92\u6709\u660e\u78ba\u734e\u52f5\u7684\u74b0\u5883\u4e0b\uff0c\u900f\u904e\u6a21\u4eff\u5c08\u5bb6\u4f86\u9032\u884c\u5b78\u7fd2\u3002<\/p>\n            <\/header>\n            \n            <section id=\"part1\" class=\"mb-16 scroll-mt-20\">\n                <h2 class=\"text-3xl font-bold mb-8 text-gray-700 border-l-4 border-blue-500 pl-4\">\u7b2c\u4e00\u7ae0\uff1a\u8207\u74b0\u5883\u4e92\u52d5\u5b78\u7fd2 (Model-Free)<\/h2>\n                <p class=\"mb-12 text-gray-600\">\u9019\u662f\u5f37\u5316\u5b78\u7fd2\u6700\u7d93\u5178\u7684\u7bc4\u5f0f\u3002\u667a\u6167\u9ad4(Agent)\u76f4\u63a5\u8207\u74b0\u5883\u4e92\u52d5\uff0c\u900f\u904e\u8a66\u932f(Trial-and-Error)\u4f86\u5b78\u7fd2\u4e00\u500b\u80fd\u6700\u5927\u5316\u7d2f\u7a4d\u734e\u52f5\u7684\u7b56\u7565\uff0c\u800c\u4e0d\u9700\u8981\u53bb\u7406\u89e3\u74b0\u5883\u7684\u5b8c\u6574\u52d5\u614b\u6a21\u578b\u3002\u9019\u500b\u9818\u57df\u4e3b\u8981\u5206\u70ba\u5169\u5927\u6d3e\u5225\uff1a\u57fa\u65bc\u50f9\u503c(Value-based)\u548c\u57fa\u65bc\u7b56\u7565(Policy-based)\u7684\u65b9\u6cd5\u3002<\/p>\n                <div class=\"relative\">\n                    <div class=\"timeline-item\" id=\"dqn\">\n                        <div class=\"timeline-dot\">1<\/div>\n                        <div class=\"content-card p-6\">\n                            <h3 class=\"text-2xl font-semibold mb-3 text-blue-700\">Deep Q-Learning (DQN): \u50f9\u503c\u5b78\u7fd2\u7684\u91cc\u7a0b\u7891<\/h3>\n                            <p class=\"text-gray-600 mb-4\">DQN \u7d50\u5408\u4e86 Q-Learning \u548c\u6df1\u5ea6\u795e\u7d93\u7db2\u7d61\uff0c\u89e3\u6c7a\u4e86\u8655\u7406\u9ad8\u7dad\u5ea6\u8f38\u5165\uff08\u5982\uff1a\u904a\u6232\u756b\u9762\uff09\u7684\u554f\u984c\u3002\u5b83\u5b78\u7fd2\u4e00\u500b &#8220;Q-value&#8221; \u51fd\u6578\uff0c\u7528\u4f86\u8a55\u4f30\u5728\u7279\u5b9a\u72c0\u614b\u4e0b\u63a1\u53d6\u67d0\u500b\u52d5\u4f5c\u7684\u597d\u58de\u3002<\/p>\n                            <button class=\"details-toggle text-blue-600 font-semibold\">\u5c55\u958b\u7d30\u7bc0 &rarr;<\/button>\n                            <div class=\"details-content mt-4 border-t pt-4 text-gray-600\">\n                                <p><strong>\u6838\u5fc3\u601d\u60f3\uff1a<\/strong> \u4f7f\u7528\u795e\u7d93\u7db2\u7d61\u4f86\u8fd1\u4f3c Q-value \u51fd\u6578 $Q(s, a)$\u3002\u8f38\u5165\u662f\u72c0\u614b $s$\uff0c\u8f38\u51fa\u662f\u6bcf\u500b\u53ef\u80fd\u52d5\u4f5c $a$ \u7684 Q-value\u3002<\/p>\n                                <p class=\"mt-2\"><strong>\u5169\u5927\u5275\u65b0\uff1a<\/strong><\/p>\n                                <ul class=\"list-disc list-inside mt-2 space-y-1\">\n                                    <li><strong>\u7d93\u9a57\u56de\u653e (Experience Replay):<\/strong> \u5c07\u667a\u6167\u9ad4\u7684\u7d93\u9a57 (state, action, reward, next_state) \u5b58\u5132\u8d77\u4f86\uff0c\u8a13\u7df4\u6642\u96a8\u6a5f\u62bd\u6a23\uff0c\u6253\u7834\u4e86\u6578\u64da\u9593\u7684\u76f8\u95dc\u6027\uff0c\u4f7f\u8a13\u7df4\u66f4\u7a69\u5b9a\u3002<\/li>\n                                    <li><strong>\u76ee\u6a19\u7db2\u7d61 (Target Network):<\/strong> \u4f7f\u7528\u4e00\u500b\u7368\u7acb\u7684\u3001\u66f4\u65b0\u8f03\u6162\u7684\u7db2\u7d61\u4f86\u8a08\u7b97\u76ee\u6a19 Q-value\uff0c\u6e1b\u5c11\u4e86\u81ea\u8209(bootstrapping)\u6642\u7684\u76ee\u6a19\u4e0d\u7a69\u5b9a\u554f\u984c\u3002<\/li>\n                                <\/ul>\n                                <p class=\"mt-2\"><strong>\u512a\u9ede\uff1a<\/strong> \u6a23\u672c\u6548\u7387(Sample efficiency)\u76f8\u5c0d\u8f03\u9ad8\uff0c\u56e0\u70ba\u5b83\u53ef\u4ee5\u91cd\u8907\u5229\u7528\u904e\u53bb\u7684\u7d93\u9a57\u3002<\/p>\n                                <p class=\"mt-2\"><strong>\u7f3a\u9ede\uff1a<\/strong> \u7121\u6cd5\u8655\u7406\u9023\u7e8c\u52d5\u4f5c\u7a7a\u9593\uff0c\u4e14\u5c0d\u65bc\u96a8\u6a5f\u7b56\u7565\u7684\u5b78\u7fd2\u6548\u679c\u4e0d\u4f73\u3002<\/p>\n                            <\/div>\n                        <\/div>\n                    <\/div>\n\n                    <div class=\"timeline-item\" id=\"ppo\">\n                        <div class=\"timeline-dot\">2<\/div>\n                        <div class=\"content-card p-6\">\n                            <h3 class=\"text-2xl font-semibold mb-3 text-green-700\">Policy Gradients &#038; PPO: \u7b56\u7565\u5b78\u7fd2\u7684\u5d1b\u8d77<\/h3>\n                            <p class=\"text-gray-600 mb-4\">\u8207\u5176\u5b78\u7fd2\u50f9\u503c\uff0c\u7b56\u7565\u68af\u5ea6\u65b9\u6cd5\u76f4\u63a5\u5b78\u7fd2\u4e00\u500b\u7b56\u7565(Policy) &#8211; \u4e00\u500b\u5f9e\u72c0\u614b\u5230\u52d5\u4f5c\u7684\u6620\u5c04\u3002\u5b83\u76f4\u63a5\u512a\u5316\u6211\u5011\u60f3\u8981\u7684\u76ee\u6a19\uff1a\u6700\u5927\u5316\u734e\u52f5\u3002PPO \u662f\u76ee\u524d\u6700\u6d41\u884c\u548c\u7a69\u5065\u7684\u7b56\u7565\u68af\u5ea6\u7b97\u6cd5\u4e4b\u4e00\u3002<\/p>\n                            <button class=\"details-toggle text-green-600 font-semibold\">\u5c55\u958b\u7d30\u7bc0 &rarr;<\/button>\n                            <div class=\"details-content mt-4 border-t pt-4 text-gray-600\">\n                                 <p class=\"mb-4\"><strong>Actor-Critic (A2C\/A3C) \u4f5c\u70ba\u6a4b\u6a11:<\/strong> \u5728\u7d14\u7cb9\u7684\u7b56\u7565\u68af\u5ea6\u548c PPO \u4e4b\u9593\uff0c\u51fa\u73fe\u4e86 Actor-Critic \u65b9\u6cd5\u3002\u5b83\u7d50\u5408\u4e86\u50f9\u503c\u5b78\u7fd2\u548c\u7b56\u7565\u5b78\u7fd2\uff1a<\/p>\n                                <div class=\"flex flex-col md:flex-row gap-4 mb-4\">\n                                    <div class=\"flex-1 p-4 bg-gray-100 rounded-lg text-center\">\n                                        <h4 class=\"font-bold\">Actor (\u6f14\u54e1)<\/h4>\n                                        <p>\u8ca0\u8cac\u5b78\u7fd2\u548c\u57f7\u884c\u7b56\u7565\uff0c\u6c7a\u5b9a\u5728\u7279\u5b9a\u72c0\u614b\u4e0b\u8a72\u505a\u4ec0\u9ebc\u52d5\u4f5c\u3002<\/p>\n                                    <\/div>\n                                    <div class=\"flex-1 p-4 bg-gray-100 rounded-lg text-center\">\n                                        <h4 class=\"font-bold\">Critic (\u8a55\u8ad6\u5bb6)<\/h4>\n                                        <p>\u8ca0\u8cac\u8a55\u4f30 Actor \u7684\u52d5\u4f5c\u597d\u58de\uff0c\u63d0\u4f9b\u50f9\u503c\u5224\u65b7\uff0c\u6307\u5c0e Actor \u7684\u5b78\u7fd2\u65b9\u5411\u3002<\/p>\n                                    <\/div>\n                                <\/div>\n                                <p><strong>PPO (Proximal Policy Optimization) \u6838\u5fc3\u601d\u60f3\uff1a<\/strong> \u7b56\u7565\u68af\u5ea6\u7684\u4e3b\u8981\u554f\u984c\u662f\u66f4\u65b0\u6b65\u9577\u96e3\u4ee5\u9078\u64c7\uff0c\u592a\u5927\u6703\u5c0e\u81f4\u7b56\u7565\u5d29\u6f70\uff0c\u592a\u5c0f\u5247\u5b78\u7fd2\u592a\u6162\u3002PPO \u900f\u904e\u4e00\u500b &#8220;\u88c1\u526a(Clipping)&#8221; \u6a5f\u5236\u4f86\u9650\u5236\u6bcf\u6b21\u7b56\u7565\u66f4\u65b0\u7684\u5e45\u5ea6\uff0c\u78ba\u4fdd\u5b78\u7fd2\u904e\u7a0b\u7684\u7a69\u5b9a\u6027\u3002<\/p>\n                                <p class=\"mt-2\"><strong>\u512a\u9ede\uff1a<\/strong> \u7a69\u5b9a\u6027\u9ad8\uff0c\u6613\u65bc\u5be6\u73fe\uff0c\u80fd\u8655\u7406\u9023\u7e8c\u548c\u96e2\u6563\u52d5\u4f5c\u7a7a\u9593\u3002<\/p>\n                                <p class=\"mt-2\"><strong>\u7f3a\u9ede\uff1a<\/strong> \u6a23\u672c\u6548\u7387\u901a\u5e38\u4f4e\u65bc DQN \u7b49 off-policy \u65b9\u6cd5\u3002<\/p>\n                            <\/div>\n                        <\/div>\n                    <\/div>\n                <\/div>\n            <\/section>\n            \n            <section id=\"part2\" class=\"mb-16 scroll-mt-20\">\n                <h2 class=\"text-3xl font-bold mb-8 text-gray-700 border-l-4 border-purple-500 pl-4\">\u7b2c\u4e8c\u7ae0\uff1a\u5b78\u7fd2\u74b0\u5883\u6a21\u578b (Model-Based)<\/h2>\n                <p class=\"mb-12 text-gray-600\">\u76f8\u5c0d\u65bc\u76f4\u63a5\u5728\u771f\u5be6\u74b0\u5883\u4e2d\u5b78\u7fd2\u7b56\u7565\uff0cModel-Based RL \u8a66\u5716\u5148\u5b78\u7fd2\u4e00\u500b\u74b0\u5883\u7684 &#8220;\u4e16\u754c\u6a21\u578b(World Model)&#8221;\u3002\u4e00\u65e6\u64c1\u6709\u4e86\u9019\u500b\u6a21\u578b\uff0c\u667a\u6167\u9ad4\u5c31\u53ef\u4ee5\u5728 &#8220;\u8166\u4e2d&#8221; \u6216 &#8220;\u5922\u5883\u4e2d&#8221; \u9032\u884c\u6a21\u64ec\u548c\u898f\u5283\uff0c\u6975\u5927\u5730\u63d0\u9ad8\u4e86\u5b78\u7fd2\u6548\u7387\u3002<\/p>\n                 <div class=\"relative\">\n                    <div class=\"timeline-item\" id=\"world-models\">\n                        <div class=\"timeline-dot\">3<\/div>\n                        <div class=\"content-card p-6\">\n                            <h3 class=\"text-2xl font-semibold mb-3 text-purple-700\">World Models: \u5728\u5922\u5883\u4e2d\u5b78\u7fd2<\/h3>\n                            <p class=\"text-gray-600 mb-4\">World Models \u662f\u4e00\u500b\u4ee3\u8868\u6027\u7684 Model-Based \u65b9\u6cd5\u3002\u5b83\u4e0d\u76f4\u63a5\u5f9e\u9ad8\u7dad\u89c0\u5bdf(\u5982\u50cf\u7d20)\u5b78\u7fd2\uff0c\u800c\u662f\u5148\u5c07\u5176\u58d3\u7e2e\u6210\u4e00\u500b\u4f4e\u7dad\u7684\u6f5b\u5728\u8868\u793a(Latent Representation)\u3002<\/p>\n                            <button class=\"details-toggle text-purple-600 font-semibold\">\u5c55\u958b\u7d30\u7bc0 &rarr;<\/button>\n                            <div class=\"details-content mt-4 border-t pt-4 text-gray-600\">\n                                <p><strong>\u4e09\u5927\u6838\u5fc3\u7d44\u4ef6\uff1a<\/strong><\/p>\n                                <div class=\"space-y-4 mt-4\">\n                                    <div class=\"p-4 bg-gray-100 rounded-lg\">\n                                        <h4 class=\"font-bold\">V (Variational Autoencoder &#8211; VAE):<\/h4>\n                                        <p>\u8ca0\u8cac\u5c07\u9ad8\u7dad\u5ea6\u7684\u89c0\u6e2c\uff08\u5982\u904a\u6232\u756b\u9762\uff09\u58d3\u7e2e\u6210\u4e00\u500b\u7dca\u6e4a\u7684\u6f5b\u5728\u5411\u91cf $z$\u3002\u9019\u662f\u5c0d\u4e16\u754c\u7684 &#8220;\u611f\u77e5&#8221;\u3002<\/p>\n                                    <\/div>\n                                    <div class=\"p-4 bg-gray-100 rounded-lg\">\n                                        <h4 class=\"font-bold\">M (MDN-RNN &#8211; Mixture Density Network with Recurrent Neural Network):<\/h4>\n                                        <p>\u9019\u662f\u6838\u5fc3\u7684 &#8220;\u4e16\u754c\u6a21\u578b&#8221;\u3002\u5b83\u5b78\u7fd2\u9810\u6e2c\u5728\u7d66\u5b9a\u7576\u524d\u6f5b\u5728\u72c0\u614b\u548c\u52d5\u4f5c\u5f8c\uff0c\u4e0b\u4e00\u500b\u6f5b\u5728\u72c0\u614b\u6703\u662f\u4ec0\u9ebc\u3002\u5b83\u6355\u6349\u4e86\u4e16\u754c\u7684\u6642\u9593\u52d5\u614b\u3002<\/p>\n                                    <\/div>\n                                    <div class=\"p-4 bg-gray-100 rounded-lg\">\n                                        <h4 class=\"font-bold\">C (Controller):<\/h4>\n                                        <p>\u4e00\u500b\u975e\u5e38\u7c21\u55ae\u7684\u7dda\u6027\u6a21\u578b\uff0c\u8ca0\u8cac\u5728 M \u5275\u9020\u7684 &#8220;\u5922\u5883&#8221; \u4e2d\u5b78\u7fd2\u5982\u4f55\u884c\u52d5\u4ee5\u6700\u5927\u5316\u734e\u52f5\u3002\u7531\u65bc\u5b83\u5728\u4f4e\u7dad\u7684\u6f5b\u5728\u7a7a\u9593\u4e2d\u5b78\u7fd2\uff0c\u6240\u4ee5\u6548\u7387\u6975\u9ad8\u3002<\/p>\n                                    <\/div>\n                                <\/div>\n                                <p class=\"mt-4\"><strong>\u512a\u9ede\uff1a<\/strong> \u6975\u9ad8\u7684\u6a23\u672c\u6548\u7387\uff0c\u56e0\u70ba\u5927\u90e8\u5206\u5b78\u7fd2\u90fd\u5728\u6a21\u578b\u5167\u90e8\u9032\u884c\u3002\u80fd\u5920\u89e3\u6c7a\u9700\u8981\u9577\u671f\u898f\u5283\u7684\u4efb\u52d9\u3002<\/p>\n                                <p class=\"mt-2\"><strong>\u7f3a\u9ede\uff1a<\/strong> \u5be6\u73fe\u8907\u96dc\uff0c\u4e14\u6a21\u578b\u53ef\u80fd\u6703\u8207\u771f\u5be6\u4e16\u754c\u5b58\u5728\u504f\u5dee(Model bias)\u3002<\/p>\n                            <\/div>\n                        <\/div>\n                    <\/div>\n                <\/div>\n            <\/section>\n\n            <section id=\"comparison\" class=\"mb-16 scroll-mt-20\">\n                <h2 class=\"text-3xl font-bold mb-8 text-gray-700 border-l-4 border-yellow-500 pl-4\">\u6f14\u7b97\u6cd5\u6bd4\u8f03<\/h2>\n                <p class=\"mb-8 text-gray-600\">\u4e0d\u540c\u7684\u6f14\u7b97\u6cd5\u5728\u4e0d\u540c\u65b9\u9762\u5404\u6709\u512a\u52a3\u3002\u4e0b\u65b9\u7684\u96f7\u9054\u5716\u6bd4\u8f03\u4e86\u6211\u5011\u8a0e\u8ad6\u904e\u7684\u4e3b\u8981\u65b9\u6cd5\u5728\u5e7e\u500b\u95dc\u9375\u6307\u6a19\u4e0a\u7684\u8868\u73fe\u3002\u60a8\u53ef\u4ee5\u900f\u904e\u4e0b\u65b9\u7684\u6309\u9215\u4f86\u5207\u63db\u986f\u793a\u7684\u6f14\u7b97\u6cd5\uff0c\u4ee5\u4fbf\u66f4\u6e05\u695a\u5730\u9032\u884c\u6bd4\u8f03\u3002<\/p>\n                <div class=\"chart-container\">\n                    <canvas id=\"rl-comparison-chart\"><\/canvas>\n                <\/div>\n                 <div id=\"chart-legend\" class=\"flex justify-center flex-wrap gap-2 mt-4\"><\/div>\n            <\/section>\n\n            <section id=\"part3\" class=\"mb-16 scroll-mt-20\">\n                <h2 class=\"text-3xl font-bold mb-8 text-gray-700 border-l-4 border-red-500 pl-4\">\u7b2c\u4e09\u7ae0\uff1a\u7121\u74b0\u5883\u4e92\u52d5\u5b78\u7fd2 (Imitation Learning)<\/h2>\n                <p class=\"mb-12 text-gray-600\">\u5728\u8a31\u591a\u73fe\u5be6\u5834\u666f\u4e2d\uff08\u5982\u81ea\u52d5\u99d5\u99db\u3001\u6a5f\u5668\u4eba\u624b\u8853\uff09\uff0c\u8b93\u667a\u6167\u9ad4\u81ea\u7531\u63a2\u7d22\u7684\u6210\u672c\u592a\u9ad8\u6216\u592a\u5371\u96aa\u3002\u5728\u9019\u4e9b\u60c5\u6cc1\u4e0b\uff0c\u6211\u5011\u5e0c\u671b\u667a\u6167\u9ad4\u80fd\u5f9e\u4eba\u985e\u5c08\u5bb6\u63d0\u4f9b\u7684\u7bc4\u4f8b\u4e2d\u5b78\u7fd2\u3002\u9019\u500b\u9818\u57df\u88ab\u7a31\u70ba\u6a21\u4eff\u5b78\u7fd2\u3002<\/p>\n                <div class=\"relative\">\n                    <div class=\"timeline-item\" id=\"imitation\">\n                        <div class=\"timeline-dot\">4<\/div>\n                        <div class=\"content-card p-6\">\n                            <h3 class=\"text-2xl font-semibold mb-3 text-red-700\">Behavior Cloning &#038; DAgger: \u5f9e\u89c0\u5bdf\u5230\u4e92\u52d5<\/h3>\n                            <p class=\"text-gray-600 mb-4\">\u9019\u662f\u6a21\u4eff\u5b78\u7fd2\u6700\u76f4\u63a5\u7684\u65b9\u6cd5\u3002\u5b83\u5c07\u5b78\u7fd2\u554f\u984c\u7c21\u5316\u70ba\u4e00\u500b\u76e3\u7763\u5b78\u7fd2\u554f\u984c\uff1a\u7d66\u5b9a\u5c08\u5bb6\u5728\u67d0\u72c0\u614b\u4e0b\u7684\u52d5\u4f5c\uff0c\u6a21\u578b\u5b78\u7fd2\u53bb\u9810\u6e2c\u9019\u500b\u52d5\u4f5c\u3002<\/p>\n                            <button class=\"details-toggle text-red-600 font-semibold\">\u5c55\u958b\u7d30\u7bc0 &rarr;<\/button>\n                            <div class=\"details-content mt-4 border-t pt-4 text-gray-600\">\n                                <p><strong>\u884c\u70ba\u514b\u9686 (Behavior Cloning &#8211; BC):<\/strong><\/p>\n                                <ul class=\"list-disc list-inside mt-2 space-y-1\">\n                                    <li><strong>\u65b9\u6cd5\uff1a<\/strong> \u6536\u96c6\u4e00\u7d44\u5c08\u5bb6\u7684 (state, action) \u6578\u64da\u5c0d\uff0c\u7136\u5f8c\u8a13\u7df4\u4e00\u500b\u5206\u985e\u6216\u56de\u6b78\u6a21\u578b\u3002<\/li>\n                                    <li><strong>\u554f\u984c\uff1a<\/strong> \u5171\u8b8a\u7570\u6578\u504f\u79fb (Covariate Shift)\u3002\u5982\u679c\u667a\u6167\u9ad4\u9047\u5230\u4e00\u500b\u5c08\u5bb6\u5f9e\u672a\u898b\u904e\u7684\u72c0\u614b\uff0c\u5b83\u7684\u884c\u70ba\u53ef\u80fd\u662f\u707d\u96e3\u6027\u7684\uff0c\u4e26\u4e14\u9019\u500b\u932f\u8aa4\u6703\u88ab\u7d2f\u7a4d\uff0c\u5c0e\u81f4\u96e2\u5c08\u5bb6\u7684\u8ecc\u8de1\u8d8a\u4f86\u8d8a\u9060\u3002<\/li>\n                                <\/ul>\n                                 <p class=\"mt-4\"><strong>\u6578\u64da\u96c6\u805a\u5408 (DAgger &#8211; Dataset Aggregation):<\/strong><\/p>\n                                <ul class=\"list-disc list-inside mt-2 space-y-1\">\n                                    <li><strong>\u65b9\u6cd5\uff1a<\/strong> \u4e00\u7a2e\u4e92\u52d5\u5f0f\u7684\u6539\u9032\u3002\u9996\u5148\u7528\u5c08\u5bb6\u6578\u64da\u8a13\u7df4\u4e00\u500b\u521d\u59cb\u7b56\u7565\u3002\u7136\u5f8c\uff0c\u7528\u9019\u500b\u7b56\u7565\u53bb\u904b\u884c\uff0c\u8a18\u9304\u4e0b\u667a\u6167\u9ad4\u8a2a\u554f\u7684\u72c0\u614b\u3002\u63a5\u8457\uff0c\u8acb\u5c08\u5bb6\u6a19\u8a3b\u5728\u9019\u4e9b\u65b0\u72c0\u614b\u4e0b\u61c9\u8a72\u63a1\u53d6\u7684\u52d5\u4f5c\u3002\u5c07\u9019\u4e9b\u65b0\u6578\u64da\u52a0\u5165\u8a13\u7df4\u96c6\uff0c\u91cd\u8907\u6b64\u904e\u7a0b\u3002<\/li>\n                                    <li><strong>\u512a\u9ede\uff1a<\/strong> \u900f\u904e\u8b93\u5c08\u5bb6 &#8220;\u7cfe\u6b63&#8221; \u667a\u6167\u9ad4\u7684\u932f\u8aa4\uff0c\u6709\u6548\u5730\u7de9\u89e3\u4e86\u5171\u8b8a\u7570\u6578\u504f\u79fb\u554f\u984c\u3002<\/li>\n                                <\/ul>\n                            <\/div>\n                        <\/div>\n                    <\/div>\n                    <div class=\"timeline-item\" id=\"irl\">\n                        <div class=\"timeline-dot\">5<\/div>\n                        <div class=\"content-card p-6\">\n                            <h3 class=\"text-2xl font-semibold mb-3 text-orange-700\">Inverse Reinforcement Learning (IRL): \u63a8\u65b7\u610f\u5716<\/h3>\n                            <p class=\"text-gray-600 mb-4\">IRL \u66f4\u9032\u4e00\u6b65\uff0c\u5b83\u4e0d\u53ea\u662f\u6a21\u4eff\u5c08\u5bb6\u7684\u884c\u70ba\uff0c\u800c\u662f\u8a66\u5716\u5f9e\u5c08\u5bb6\u7684\u884c\u70ba\u4e2d &#8220;\u53cd\u5411\u63a8\u65b7&#8221; \u51fa\u5c08\u5bb6\u80cc\u5f8c\u7684\u734e\u52f5\u51fd\u6578(Reward Function)\u3002\u4e00\u65e6\u5b78\u5230\u4e86\u734e\u52f5\u51fd\u6578\uff0c\u5c31\u53ef\u4ee5\u7528\u4efb\u4f55\u6a19\u6e96\u7684 RL \u7b97\u6cd5\u4f86\u627e\u5230\u6700\u512a\u7b56\u7565\u3002<\/p>\n                            <button class=\"details-toggle text-orange-600 font-semibold\">\u5c55\u958b\u7d30\u7bc0 &rarr;<\/button>\n                            <div class=\"details-content mt-4 border-t pt-4 text-gray-600\">\n                                <p><strong>\u6838\u5fc3\u601d\u60f3\uff1a<\/strong> \u5c08\u5bb6\u7684\u884c\u70ba\u4e4b\u6240\u4ee5 &#8220;\u597d&#8221;\uff0c\u662f\u56e0\u70ba\u5b83\u5728\u67d0\u500b\u6211\u5011\u4e0d\u77e5\u9053\u7684\u734e\u52f5\u51fd\u6578\u4e0b\u662f\u6700\u512a\u7684\u3002IRL \u7684\u76ee\u6a19\u5c31\u662f\u627e\u5230\u9019\u500b\u734e\u52f5\u51fd\u6578\u3002<\/p>\n                                <p class=\"mt-4\"><strong>\u8207 GAN \u7684\u806f\u7e6b (Generative Adversarial Imitation Learning &#8211; GAIL):<\/strong><\/p>\n                                <p class=\"mt-2\">GAIL \u5c07 IRL \u554f\u984c\u5de7\u5999\u5730\u8f49\u5316\u70ba\u4e00\u500b\u751f\u6210\u5c0d\u6297\u7db2\u7d61 (GAN) \u554f\u984c\u3002<\/p>\n                                <div class=\"flex flex-col md:flex-row gap-4 my-4\">\n                                    <div class=\"flex-1 p-4 bg-gray-100 rounded-lg text-center\">\n                                        <h4 class=\"font-bold\">\u751f\u6210\u5668 (Generator)<\/h4>\n                                        <p>\u5c0d\u61c9\u65bc RL \u4e2d\u7684\u7b56\u7565 (Policy)\u3002\u5b83\u751f\u6210\u4e00\u7cfb\u5217\u7684 (state, action) \u8ecc\u8de1\uff0c\u8a66\u5716\u8b93\u9019\u4e9b\u8ecc\u8de1\u770b\u8d77\u4f86\u50cf\u662f\u5c08\u5bb6\u751f\u6210\u7684\u3002<\/p>\n                                    <\/div>\n                                    <div class=\"flex-1 p-4 bg-gray-100 rounded-lg text-center\">\n                                        <h4 class=\"font-bold\">\u5224\u5225\u5668 (Discriminator)<\/h4>\n                                        <p>\u5c0d\u61c9\u65bc RL \u4e2d\u7684\u734e\u52f5\u51fd\u6578\/\u50f9\u503c\u51fd\u6578\u3002\u5b83\u5b78\u7fd2\u53bb\u5340\u5206 &#8220;\u751f\u6210\u5668\u751f\u6210\u7684\u8ecc\u8de1&#8221; \u548c &#8220;\u771f\u5be6\u5c08\u5bb6\u8ecc\u8de1&#8221;\u3002\u5224\u5225\u5668\u7684\u8f38\u51fa\u53ef\u4ee5\u88ab\u7576\u4f5c\u4e00\u500b\u734e\u52f5\u4fe1\u865f\u4f86\u8a13\u7df4\u751f\u6210\u5668\u3002<\/p>\n                                    <\/div>\n                                <\/div>\n                                <p><strong>\u512a\u9ede\uff1a<\/strong> \u6bd4 BC \u66f4\u5177\u6cdb\u5316\u80fd\u529b\u3002\u5b78\u5230\u7684\u734e\u52f5\u51fd\u6578\u53ef\u80fd\u6bd4\u5c08\u5bb6\u672c\u8eab\u66f4\u512a\u5316\uff0c\u5f9e\u800c\u53ef\u80fd\u5b78\u5230\u8d85\u8d8a\u5c08\u5bb6\u7684\u7b56\u7565\u3002<\/p>\n                                <p class=\"mt-2\"><strong>\u7f3a\u9ede\uff1a<\/strong> \u8a08\u7b97\u6210\u672c\u9ad8\uff0c\u4e14\u901a\u5e38\u9700\u8981\u5927\u91cf\u7684\u5c08\u5bb6\u6578\u64da\u3002<\/p>\n                            <\/div>\n                        <\/div>\n                    <\/div>\n                <\/div>\n            <\/section>\n\n            <section id=\"summary\" class=\"scroll-mt-20\">\n                 <h2 class=\"text-3xl font-bold mb-8 text-gray-700 border-l-4 border-gray-500 pl-4\">\u7e3d\u7d50<\/h2>\n                 <div class=\"bg-white p-8 rounded-lg shadow-md\">\n                    <p class=\"text-gray-600 leading-relaxed\">\u5f37\u5316\u5b78\u7fd2\u7684\u6f14\u9032\u5c55\u73fe\u4e86\u5f9e\u4f9d\u8cf4\u5927\u91cf\u74b0\u5883\u4e92\u52d5\u548c\u660e\u78ba\u734e\u52f5\uff0c\u5230\u8ffd\u6c42\u66f4\u9ad8\u6a23\u672c\u6548\u7387\u548c\u5f9e\u96b1\u542b\u76ee\u6a19\u4e2d\u5b78\u7fd2\u7684\u8da8\u52e2\u3002<\/p>\n                    <ul class=\"list-disc list-inside mt-4 space-y-2 text-gray-700\">\n                        <li><strong>\u5f9e\u50f9\u503c\u5230\u7b56\u7565\uff1a<\/strong> \u5f9e DQN \u7684\u50f9\u503c\u8a55\u4f30\uff0c\u5230 PPO \u7684\u76f4\u63a5\u7b56\u7565\u512a\u5316\uff0c\u6211\u5011\u770b\u5230\u4e86\u5c0d\u66f4\u7a69\u5b9a\u3001\u66f4\u901a\u7528\u5b78\u7fd2\u7b97\u6cd5\u7684\u8ffd\u6c42\u3002<\/li>\n                        <li><strong>\u5f9e\u7121\u6a21\u578b\u5230\u6709\u6a21\u578b\uff1a<\/strong> World Models \u4ee3\u8868\u4e86 RL \u7684\u4e00\u500b\u91cd\u8981\u65b9\u5411\u2014\u2014\u900f\u904e\u5b78\u7fd2\u74b0\u5883\u6a21\u578b\u4f86\u9032\u884c\u5167\u90e8\u898f\u5283\uff0c\u9019\u5728\u6a23\u672c\u7a00\u7f3a\u7684\u5834\u666f\u4e0b\u6975\u5177\u6f5b\u529b\u3002<\/li>\n                        <li><strong>\u5f9e\u734e\u52f5\u5230\u6a21\u4eff\uff1a<\/strong> \u7576\u734e\u52f5\u96e3\u4ee5\u5b9a\u7fa9\u6642\uff0c\u6a21\u4eff\u5b78\u7fd2 (BC, DAgger, IRL) \u63d0\u4f9b\u4e86\u4e00\u689d\u5f37\u5927\u7684\u8def\u5f91\uff0c\u4f7f\u667a\u6167\u9ad4\u80fd\u5920\u5f9e\u5c08\u5bb6\u793a\u7bc4\u4e2d\u5b78\u7fd2\u8907\u96dc\u7684\u884c\u70ba\u3002<\/li>\n                    <\/ul>\n                    <p class=\"mt-4 text-gray-600\">\u672a\u4f86\u7684\u7814\u7a76\u53ef\u80fd\u6703\u66f4\u6df1\u5165\u5730\u878d\u5408\u9019\u4e9b\u65b9\u6cd5\uff0c\u4f8b\u5982\u5c07\u4e16\u754c\u6a21\u578b\u8207\u6a21\u4eff\u5b78\u7fd2\u7d50\u5408\uff0c\u6216\u958b\u767c\u51fa\u66f4\u9ad8\u6548\u3001\u66f4\u5b89\u5168\u7684\u63a2\u7d22\u6a5f\u5236\uff0c\u63a8\u52d5 AI \u5728\u66f4\u8907\u96dc\u7684\u73fe\u5be6\u4e16\u754c\u4efb\u52d9\u4e2d\u53d6\u5f97\u7a81\u7834\u3002<\/p>\n                 <\/div>\n            <\/section>\n        <\/main>\n    <\/div>\n\n<script>\ndocument.addEventListener('DOMContentLoaded', function() {\n    const detailsToggles = document.querySelectorAll('.details-toggle');\n    detailsToggles.forEach(toggle => {\n        toggle.addEventListener('click', () => {\n            const content = toggle.nextElementSibling;\n            content.classList.toggle('open');\n            if (content.classList.contains('open')) {\n                toggle.innerHTML = '\u6536\u8d77\u7d30\u7bc0 &larr;';\n            } else {\n                toggle.innerHTML = '\u5c55\u958b\u7d30\u7bc0 &rarr;';\n            }\n        });\n    });\n\n    const data = {\n        labels: ['\u6a23\u672c\u6548\u7387', '\u7a69\u5b9a\u6027', '\u5be6\u73fe\u8907\u96dc\u5ea6', '\u8a08\u7b97\u6210\u672c', '\u53ef\u64f4\u5c55\u6027'],\n        datasets: [\n            {\n                label: 'DQN (\u50f9\u503c\u5b78\u7fd2)',\n                data: [4, 3, 3, 3, 4],\n                backgroundColor: 'rgba(59, 130, 246, 0.2)',\n                borderColor: 'rgba(59, 130, 246, 1)',\n                pointBackgroundColor: 'rgba(59, 130, 246, 1)',\n                pointBorderColor: '#fff',\n                pointHoverBackgroundColor: '#fff',\n                pointHoverBorderColor: 'rgba(59, 130, 246, 1)',\n                hidden: false,\n            },\n            {\n                label: 'PPO (\u7b56\u7565\u5b78\u7fd2)',\n                data: [2, 5, 2, 4, 5],\n                backgroundColor: 'rgba(16, 185, 129, 0.2)',\n                borderColor: 'rgba(16, 185, 129, 1)',\n                pointBackgroundColor: 'rgba(16, 185, 129, 1)',\n                pointBorderColor: '#fff',\n                pointHoverBackgroundColor: '#fff',\n                pointHoverBorderColor: 'rgba(16, 185, 129, 1)',\n                hidden: false,\n            },\n            {\n                label: 'World Models (\u6a21\u578b\u5b78\u7fd2)',\n                data: [5, 2, 5, 5, 3],\n                backgroundColor: 'rgba(139, 92, 246, 0.2)',\n                borderColor: 'rgba(139, 92, 246, 1)',\n                pointBackgroundColor: 'rgba(139, 92, 246, 1)',\n                pointBorderColor: '#fff',\n                pointHoverBackgroundColor: '#fff',\n                pointHoverBorderColor: 'rgba(139, 92, 246, 1)',\n                hidden: false,\n            },\n             {\n                label: 'GAIL (\u6a21\u4eff\u5b78\u7fd2)',\n                data: [3, 4, 4, 4, 4],\n                backgroundColor: 'rgba(249, 115, 22, 0.2)',\n                borderColor: 'rgba(249, 115, 22, 1)',\n                pointBackgroundColor: 'rgba(249, 115, 22, 1)',\n                pointBorderColor: '#fff',\n                pointHoverBackgroundColor: '#fff',\n                pointHoverBorderColor: 'rgba(249, 115, 22, 1)',\n                hidden: true,\n            }\n        ]\n    };\n\n    const config = {\n        type: 'radar',\n        data: data,\n        options: {\n            maintainAspectRatio: false,\n            plugins: {\n                legend: {\n                    display: false \n                },\n                tooltip: {\n                    callbacks: {\n                        label: function(context) {\n                            let label = context.dataset.label || '';\n                            if (label) {\n                                label += ': ';\n                            }\n                            if (context.parsed.r !== null) {\n                                let score = context.parsed.r;\n                                let description = '';\n                                if (score <= 1) description = ' (\u5f88\u4f4e)';\n                                else if (score === 2) description = ' (\u4f4e)';\n                                else if (score === 3) description = ' (\u4e2d\u7b49)';\n                                else if (score === 4) description = ' (\u9ad8)';\n                                else if (score >= 5) description = ' (\u5f88\u9ad8)';\n                                label += score + description;\n                            }\n                            return label;\n                        }\n                    }\n                }\n            },\n            scales: {\n                r: {\n                    angleLines: {\n                        color: 'rgba(0, 0, 0, 0.1)'\n                    },\n                    grid: {\n                        color: 'rgba(0, 0, 0, 0.1)'\n                    },\n                    pointLabels: {\n                        font: {\n                            size: 14,\n                            weight: 'bold'\n                        },\n                         color: '#4b5563'\n                    },\n                    ticks: {\n                        backdropColor: 'transparent',\n                        stepSize: 1,\n                        max: 5,\n                        min: 0,\n                        display: false,\n                    }\n                }\n            }\n        }\n    };\n    \n    const ctx = document.getElementById('rl-comparison-chart').getContext('2d');\n    const comparisonChart = new Chart(ctx, config);\n\n    const legendContainer = document.getElementById('chart-legend');\n    comparisonChart.data.datasets.forEach((dataset, index) => {\n        const button = document.createElement('button');\n        button.innerHTML = `\n            <span style=\"background-color:${dataset.borderColor}; width: 1rem; height: 1rem; display: inline-block; border-radius: 50%; margin-right: 0.5rem; vertical-align: middle;\"><\/span>\n            <span>${dataset.label}<\/span>\n        `;\n        button.className = 'px-4 py-2 rounded-full text-sm font-medium transition';\n        button.style.border = `2px solid ${dataset.borderColor}`;\n        \n        const updateButtonStyle = () => {\n             if (comparisonChart.isDatasetVisible(index)) {\n                button.style.backgroundColor = dataset.backgroundColor;\n                button.style.color = '#1f2937';\n            } else {\n                button.style.backgroundColor = 'transparent';\n                button.style.color = '#4b5563';\n            }\n        };\n\n        button.onclick = () => {\n            comparisonChart.toggleDataVisibility(index);\n            comparisonChart.update();\n            updateButtonStyle();\n        };\n        \n        updateButtonStyle();\n        legendContainer.appendChild(button);\n    });\n});\n<\/script>\n\n<\/body>\n<\/html>\n\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5f37\u5316\u5b78\u7fd2\u6f14\u9032\u4e4b\u8def \u6f14\u9032\u5c0e\u822a \u4ecb\u7d39 \u7b2c\u4e00\u7ae0\uff1a\u8207\u74b0\u5883\u4e92\u52d5\u5b78\u7fd2 Deep Q-Lear &hellip; <a href=\"https:\/\/ouyangminwei.com\/index.php\/2025\/08\/01\/the-evolution-of-reinforcement-learning\/\">\u95b1\u8b80\u5168\u6587 <span class=\"meta-nav\">&rarr;<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[1],"tags":[],"post_format":[],"class_list":["post-884","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"_edit_lock":"1752832651:1","_edit_last":"1","_aioseo_title":"#post_title #separator_sa #site_title","_aioseo_description":"#post_excerpt","_aioseo_keywords":"","_aioseo_og_title":"","_aioseo_og_description":"","_aioseo_og_article_section":"","_aioseo_og_article_tags":"","_aioseo_twitter_title":"","_aioseo_twitter_description":"","_oembed_2544c1d0cb3503ab4c4d558c3b3c8873":"","_oembed_time_2544c1d0cb3503ab4c4d558c3b3c8873":"","_oembed_99481806ecbe6ce4ee46f8588d320993":"","_oembed_db663acf973e82e6d9d80df71945dfb8":"","_oembed_16cdfab488f57db73586f4286af2704f":"","_wp_old_slug":"","_links":{"self":[{"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/posts\/884","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/comments?post=884"}],"version-history":[{"count":2,"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/posts\/884\/revisions"}],"predecessor-version":[{"id":886,"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/posts\/884\/revisions\/886"}],"wp:attachment":[{"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/media?parent=884"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/categories?post=884"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/tags?post=884"},{"taxonomy":"post_format","embeddable":true,"href":"https:\/\/ouyangminwei.com\/index.php\/wp-json\/wp\/v2\/post_format?post=884"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}