{"id":488,"date":"2026-02-17T16:37:23","date_gmt":"2026-02-17T07:37:23","guid":{"rendered":"https:\/\/apprhythm.net\/?p=488"},"modified":"2026-02-17T16:37:23","modified_gmt":"2026-02-17T07:37:23","slug":"llm%e3%81%ae%e5%bc%b7%e5%8c%96%e5%ad%a6%e7%bf%92%e3%81%ab%e3%81%a4%e3%81%84%e3%81%a6","status":"publish","type":"post","link":"https:\/\/apprhythm.net\/?p=488","title":{"rendered":"LLM\u306e\u5f37\u5316\u5b66\u7fd2\u306b\u3064\u3044\u3066"},"content":{"rendered":"\n<style>\n\/* ===================================================\n   \u30a2\u30d7\u30ea\u30ba\u30e0 \u30c6\u30c3\u30af\u30d6\u30ed\u30b0 \u2014 \u6295\u7a3f\u30b9\u30bf\u30a4\u30eb\n   \u30af\u30e9\u30b9\u540d\u306f\u3059\u3079\u3066 \"ap-\" \u30d7\u30ec\u30d5\u30a3\u30c3\u30af\u30b9\u3067\u7ba1\u7406\n   =================================================== *\/\n\n\/* \u2500\u2500 \u30ab\u30e9\u30fc\u5909\u6570 \u2500\u2500 *\/\n.ap-post {\n  --ap-navy:    #1a2340;\n  --ap-orange:  #e8500a;\n  --ap-amber:   #f5a623;\n  --ap-dim:     #4a5568;\n  --ap-muted:   #8a93a6;\n  --ap-border:  #e2e6ed;\n  --ap-bg:      #f7f8fa;\n  --ap-white:   #ffffff;\n  --ap-grad:    linear-gradient(135deg, #e8500a 0%, #f5a623 100%);\n  --ap-grad-dk: linear-gradient(135deg, #1a2340 0%, #2d3f6e 100%);\n  font-family: \"Noto Sans JP\", \"Hiragino Kaku Gothic ProN\", \"Yu Gothic\", sans-serif;\n  font-weight: 300;\n  line-height: 1.9;\n  color: var(--ap-dim);\n  max-width: 860px;\n}\n\n\/* \u2500\u2500 \u30a2\u30a4\u30ad\u30e3\u30c3\u30c1\u4e0b\u306e\u30ea\u30fc\u30c9\u6587\u30d6\u30ed\u30c3\u30af \u2500\u2500 *\/\n.ap-lead-block {\n  background: var(--ap-grad-dk);\n  border-radius: 10px;\n  padding: 2.2rem 2.4rem;\n  margin: 2rem 0 2.8rem;\n  position: relative;\n  overflow: hidden;\n}\n.ap-lead-block::after {\n  content: '';\n  position: absolute;\n  top: -40px; right: -40px;\n  width: 200px; height: 200px;\n  background: radial-gradient(ellipse, rgba(232,80,10,0.25) 0%, transparent 65%);\n  pointer-events: none;\n}\n.ap-lead-block p {\n  color: rgba(255,255,255,0.8);\n  margin: 0;\n  font-size: 0.97rem;\n  line-height: 1.9;\n  position: relative;\n  z-index: 1;\n}\n.ap-lead-block strong { color: #fff; font-weight: 500; }\n\n\/* \u2500\u2500 \u30bf\u30b0\u884c \u2500\u2500 *\/\n.ap-meta {\n  display: flex;\n  flex-wrap: wrap;\n  gap: 0.6rem;\n  align-items: center;\n  margin-bottom: 1.6rem;\n}\n.ap-tag {\n  display: inline-block;\n  font-size: 0.68rem;\n  font-weight: 700;\n  padding: 3px 12px;\n  border-radius: 20px;\n  letter-spacing: 0.06em;\n  text-transform: uppercase;\n  font-family: \"Space Mono\", \"Courier New\", monospace;\n}\n.ap-tag-ai   { background: rgba(232,80,10,0.1); color: var(--ap-orange); border: 1px solid rgba(232,80,10,0.25); }\n.ap-tag-ml   { background: rgba(26,35,64,0.07); color: var(--ap-navy);   border: 1px solid rgba(26,35,64,0.15); }\n.ap-tag-ref  { background: rgba(16,185,129,0.08); color: #059669;        border: 1px solid rgba(16,185,129,0.25); }\n.ap-date     { font-size: 0.78rem; color: var(--ap-muted); margin-left: auto; }\n\n\/* \u2500\u2500 \u76ee\u6b21 \u2500\u2500 *\/\n.ap-toc {\n  background: var(--ap-white);\n  border: 1px solid var(--ap-border);\n  border-top: 3px solid var(--ap-orange);\n  border-radius: 0 0 10px 10px;\n  padding: 1.4rem 1.8rem;\n  margin: 2rem 0 2.8rem;\n  box-shadow: 0 2px 12px rgba(26,35,64,0.05);\n}\n.ap-toc-title {\n  font-size: 0.72rem;\n  font-weight: 700;\n  color: var(--ap-orange);\n  letter-spacing: 0.1em;\n  text-transform: uppercase;\n  margin: 0 0 0.9rem;\n  font-family: \"Space Mono\", monospace;\n}\n.ap-toc ol {\n  margin: 0;\n  padding-left: 1.3rem;\n}\n.ap-toc li {\n  margin: 0.3rem 0;\n  font-size: 0.87rem;\n  color: var(--ap-dim);\n}\n.ap-toc a {\n  color: var(--ap-navy);\n  text-decoration: none;\n  transition: color 0.2s;\n}\n.ap-toc a:hover { color: var(--ap-orange); text-decoration: underline; }\n.ap-toc li.sub { list-style: none; margin-left: 1rem; font-size: 0.82rem; color: var(--ap-muted); }\n.ap-toc li.sub::before { content: \"\u2514 \"; }\n\n\/* \u2500\u2500 \u30bb\u30af\u30b7\u30e7\u30f3\u898b\u51fa\u3057 \u2500\u2500 *\/\n.ap-section-head {\n  display: flex;\n  align-items: center;\n  gap: 0.8rem;\n  margin: 3.2rem 0 1.4rem;\n}\n.ap-section-num {\n  font-family: \"Space Mono\", monospace;\n  font-size: 0.68rem;\n  font-weight: 700;\n  color: var(--ap-orange);\n  background: rgba(232,80,10,0.08);\n  padding: 3px 10px;\n  border-radius: 20px;\n  flex-shrink: 0;\n}\n.ap-section-line {\n  flex: 1;\n  height: 1px;\n  background: var(--ap-border);\n}\n.ap-post h2 {\n  font-size: 1.55rem;\n  font-weight: 700;\n  color: var(--ap-navy);\n  margin: 0 0 1.1rem;\n  line-height: 1.45;\n  padding-left: 0.9rem;\n  border-left: 3px solid var(--ap-orange);\n}\n.ap-post h3 {\n  font-size: 1.05rem;\n  font-weight: 700;\n  color: var(--ap-navy);\n  margin: 1.8rem 0 0.7rem;\n  display: flex;\n  align-items: center;\n  gap: 0.6rem;\n}\n.ap-post h3::before {\n  content: '';\n  display: inline-block;\n  width: 18px;\n  height: 3px;\n  background: var(--ap-grad);\n  border-radius: 2px;\n  flex-shrink: 0;\n}\n.ap-post h4 {\n  font-size: 0.97rem;\n  font-weight: 700;\n  color: var(--ap-navy);\n  margin: 0 0 0.4rem;\n}\n.ap-post p {\n  margin: 0 0 1.3rem;\n  font-size: 0.96rem;\n}\n.ap-post strong { color: var(--ap-navy); font-weight: 500; }\n.ap-post em { color: var(--ap-muted); font-style: italic; }\n\n\/* \u2500\u2500 \u30b3\u30fc\u30eb\u30a2\u30a6\u30c8 \u2500\u2500 *\/\n.ap-callout {\n  background: var(--ap-bg);\n  border: 1px solid var(--ap-border);\n  border-left: 4px solid var(--ap-orange);\n  border-radius: 0 8px 8px 0;\n  padding: 1.3rem 1.5rem;\n  margin: 1.8rem 0;\n}\n.ap-callout-label {\n  display: block;\n  font-family: \"Space Mono\", monospace;\n  font-size: 0.66rem;\n  font-weight: 700;\n  color: var(--ap-orange);\n  letter-spacing: 0.1em;\n  text-transform: uppercase;\n  margin-bottom: 0.5rem;\n}\n.ap-callout p { margin: 0; font-size: 0.9rem; }\n\n\/* \u2500\u2500 \u30d7\u30ed\u30bb\u30b9\u30d5\u30ed\u30fc \u2500\u2500 *\/\n.ap-process { margin: 2rem 0; }\n.ap-step {\n  display: grid;\n  grid-template-columns: 52px 1fr;\n  gap: 0 1rem;\n  position: relative;\n  padding-bottom: 2rem;\n}\n.ap-step:last-child { padding-bottom: 0; }\n.ap-step:not(:last-child)::before {\n  content: '';\n  position: absolute;\n  left: 25px; top: 52px; bottom: 0;\n  width: 2px;\n  background: linear-gradient(to bottom, var(--ap-orange), var(--ap-border));\n}\n.ap-step-num {\n  width: 52px; height: 52px;\n  border-radius: 50%;\n  background: var(--ap-grad);\n  color: #fff;\n  font-family: \"Space Mono\", monospace;\n  font-size: 0.82rem;\n  font-weight: 700;\n  display: flex; align-items: center; justify-content: center;\n  flex-shrink: 0;\n  position: relative; z-index: 1;\n  box-shadow: 0 4px 14px rgba(232,80,10,0.28);\n}\n.ap-step-body { padding-top: 0.5rem; }\n\n\/* \u2500\u2500 \u30ab\u30fc\u30c9\u30b0\u30ea\u30c3\u30c9 \u2500\u2500 *\/\n.ap-card-grid {\n  display: grid;\n  grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));\n  gap: 1.1rem;\n  margin: 1.8rem 0;\n}\n.ap-card {\n  background: var(--ap-white);\n  border: 1px solid var(--ap-border);\n  border-radius: 10px;\n  padding: 1.4rem;\n  box-shadow: 0 2px 8px rgba(26,35,64,0.05);\n  transition: box-shadow 0.2s, transform 0.2s;\n  position: relative;\n  overflow: hidden;\n}\n.ap-card::before {\n  content: '';\n  position: absolute;\n  top: 0; left: 0; right: 0;\n  height: 3px;\n}\n.ap-card-a::before { background: var(--ap-grad); }\n.ap-card-b::before { background: var(--ap-grad-dk); }\n.ap-card-c::before { background: linear-gradient(135deg,#10b981,#34d399); }\n.ap-card:hover { box-shadow: 0 8px 22px rgba(232,80,10,0.11); transform: translateY(-2px); }\n.ap-card-icon { font-size: 1.6rem; display: block; margin-bottom: 0.7rem; }\n.ap-card h4 { margin: 0 0 0.45rem; }\n.ap-card p  { margin: 0; font-size: 0.82rem; line-height: 1.7; }\n\n\/* \u2500\u2500 \u30b3\u30fc\u30c9\u30d6\u30ed\u30c3\u30af \u2500\u2500 *\/\n.ap-code-wrap {\n  background: #1a2340;\n  border-radius: 10px;\n  overflow: hidden;\n  margin: 1.8rem 0;\n  box-shadow: 0 4px 18px rgba(26,35,64,0.16);\n}\n.ap-code-head {\n  display: flex;\n  align-items: center;\n  justify-content: space-between;\n  padding: 0.65rem 1.2rem;\n  background: rgba(255,255,255,0.04);\n  border-bottom: 1px solid rgba(255,255,255,0.07);\n}\n.ap-code-lang {\n  font-family: \"Space Mono\", monospace;\n  font-size: 0.64rem;\n  font-weight: 700;\n  color: #ffb38a;\n  letter-spacing: 0.08em;\n  text-transform: uppercase;\n}\n.ap-code-dots { display: flex; gap: 5px; }\n.ap-code-dots span { width: 10px; height: 10px; border-radius: 50%; }\n.ap-code-dots span:nth-child(1) { background: #ff5f57; }\n.ap-code-dots span:nth-child(2) { background: #ffbd2e; }\n.ap-code-dots span:nth-child(3) { background: #28c840; }\n.ap-code-wrap pre {\n  margin: 0;\n  padding: 1.3rem 1.5rem;\n  font-family: \"Space Mono\", \"Courier New\", monospace;\n  font-size: 0.76rem;\n  line-height: 1.8;\n  color: #94a3b8;\n  overflow-x: auto;\n  white-space: pre;\n}\n.ap-kw  { color: #a78bfa; }\n.ap-fn  { color: #67e8f9; }\n.ap-str { color: #6ee7b7; }\n.ap-cm  { color: #475569; font-style: italic; }\n.ap-num { color: #fb923c; }\n\n\/* \u2500\u2500 \u6bd4\u8f03\u30c6\u30fc\u30d6\u30eb \u2500\u2500 *\/\n.ap-table-wrap { overflow-x: auto; margin: 1.8rem 0; border-radius: 10px; box-shadow: 0 2px 12px rgba(26,35,64,0.06); }\n.ap-table {\n  width: 100%;\n  border-collapse: collapse;\n  font-size: 0.86rem;\n  min-width: 560px;\n}\n.ap-table th {\n  background: var(--ap-navy);\n  color: rgba(255,255,255,0.85);\n  padding: 0.85rem 1.1rem;\n  text-align: left;\n  font-family: \"Space Mono\", monospace;\n  font-size: 0.65rem;\n  letter-spacing: 0.06em;\n  text-transform: uppercase;\n  border-right: 1px solid rgba(255,255,255,0.1);\n}\n.ap-table td {\n  padding: 0.8rem 1.1rem;\n  border: 1px solid var(--ap-border);\n  color: var(--ap-dim);\n  vertical-align: top;\n}\n.ap-table tr:nth-child(even) td { background: var(--ap-bg); }\n.ap-table td:first-child { color: var(--ap-navy); font-weight: 500; }\n.ap-table tr:hover td { background: rgba(232,80,10,0.04); }\n\n\/* \u2500\u2500 \u30d0\u30c3\u30b8 \u2500\u2500 *\/\n.ap-badge {\n  display: inline-block;\n  font-family: \"Space Mono\", monospace;\n  font-size: 0.62rem;\n  font-weight: 700;\n  padding: 2px 8px;\n  border-radius: 20px;\n  margin-left: 5px;\n}\n.ap-badge-new  { background: rgba(232,80,10,0.1); color: var(--ap-orange); border: 1px solid rgba(232,80,10,0.25); }\n.ap-badge-old  { background: var(--ap-bg); color: var(--ap-muted); border: 1px solid var(--ap-border); }\n\n\/* \u2500\u2500 \u30c1\u30e3\u30ec\u30f3\u30b8\u30ea\u30b9\u30c8 \u2500\u2500 *\/\n.ap-challenges { margin: 1.8rem 0; display: flex; flex-direction: column; gap: 0.9rem; }\n.ap-challenge {\n  background: var(--ap-white);\n  border: 1px solid var(--ap-border);\n  border-radius: 10px;\n  padding: 1.2rem 1.4rem;\n  display: grid;\n  grid-template-columns: 2.2rem 1fr;\n  gap: 0 0.8rem;\n  box-shadow: 0 2px 8px rgba(26,35,64,0.04);\n}\n.ap-challenge-icon { font-size: 1.2rem; padding-top: 2px; }\n.ap-challenge h4 { margin: 0 0 0.3rem; }\n.ap-challenge p  { margin: 0; font-size: 0.85rem; }\n\n\/* \u2500\u2500 \u30b9\u30bf\u30c3\u30c8\u30dc\u30c3\u30af\u30b9 \u2500\u2500 *\/\n.ap-stat-row {\n  display: grid;\n  grid-template-columns: repeat(3, 1fr);\n  gap: 1rem;\n  margin: 2rem 0;\n}\n@media (max-width: 580px) { .ap-stat-row { grid-template-columns: 1fr; } }\n.ap-stat-box {\n  background: var(--ap-navy);\n  border-radius: 10px;\n  padding: 1.5rem 1.2rem;\n  text-align: center;\n  box-shadow: 0 4px 16px rgba(26,35,64,0.18);\n}\n.ap-stat-val {\n  display: block;\n  font-family: \"Space Mono\", monospace;\n  font-size: 1.7rem;\n  font-weight: 700;\n  background: var(--ap-grad);\n  -webkit-background-clip: text;\n  -webkit-text-fill-color: transparent;\n  background-clip: text;\n  line-height: 1.2;\n  margin-bottom: 0.4rem;\n}\n.ap-stat-label {\n  font-size: 0.73rem;\n  color: rgba(255,255,255,0.5);\n  line-height: 1.5;\n}\n\n\/* \u2500\u2500 \u63a1\u7528CTA\u30d0\u30ca\u30fc \u2500\u2500 *\/\n.ap-cta {\n  background: var(--ap-grad-dk);\n  border-radius: 10px;\n  padding: 2rem 2.2rem;\n  margin: 3rem 0 1rem;\n  display: flex;\n  align-items: center;\n  justify-content: space-between;\n  gap: 1.5rem;\n  flex-wrap: wrap;\n  position: relative;\n  overflow: hidden;\n}\n.ap-cta::before {\n  content: '';\n  position: absolute;\n  top: -30px; right: -30px;\n  width: 160px; height: 160px;\n  background: radial-gradient(ellipse, rgba(232,80,10,0.22) 0%, transparent 65%);\n  pointer-events: none;\n}\n.ap-cta-text { position: relative; z-index: 1; }\n.ap-cta-text p { color: rgba(255,255,255,0.7); font-size: 0.88rem; margin: 0.3rem 0 0; }\n.ap-cta-text strong { color: #fff; font-size: 1.05rem; display: block; margin-bottom: 0.2rem; }\n.ap-cta-btn {\n  display: inline-block;\n  background: var(--ap-grad);\n  color: #fff;\n  font-weight: 700;\n  font-size: 0.88rem;\n  padding: 0.7rem 1.6rem;\n  border-radius: 6px;\n  text-decoration: none;\n  box-shadow: 0 4px 14px rgba(232,80,10,0.35);\n  transition: opacity 0.2s, transform 0.2s;\n  white-space: nowrap;\n  position: relative; z-index: 1;\n}\n.ap-cta-btn:hover { opacity: 0.88; transform: translateY(-1px); color: #fff; }\n\n\/* \u2500\u2500 \u53c2\u7167\u8ad6\u6587 \u2500\u2500 *\/\n.ap-ref {\n  background: var(--ap-bg);\n  border: 1px solid var(--ap-border);\n  border-left: 4px solid var(--ap-navy);\n  border-radius: 0 8px 8px 0;\n  padding: 1.2rem 1.5rem;\n  margin: 1.8rem 0;\n  font-size: 0.88rem;\n}\n.ap-ref-label {\n  font-family: \"Space Mono\", monospace;\n  font-size: 0.65rem;\n  font-weight: 700;\n  color: var(--ap-navy);\n  letter-spacing: 0.1em;\n  text-transform: uppercase;\n  display: block;\n  margin-bottom: 0.5rem;\n}\n.ap-ref p { margin: 0; color: var(--ap-dim); }\n\n\/* \u2500\u2500 \u30ec\u30b9\u30dd\u30f3\u30b7\u30d6\u8abf\u6574 \u2500\u2500 *\/\n@media (max-width: 640px) {\n  .ap-lead-block { padding: 1.5rem 1.3rem; }\n  .ap-card-grid  { grid-template-columns: 1fr; }\n  .ap-cta        { flex-direction: column; }\n  .ap-stat-row   { grid-template-columns: 1fr 1fr; }\n}\n<\/style>\n\n<!-- ======================================================\n     \u6295\u7a3f\u672c\u6587 \u2014 \u3053\u3053\u304b\u3089\u8cbc\u308a\u4ed8\u3051\n     ====================================================== -->\n<div class=\"ap-post\">\n\n  <!-- \u30bf\u30b0\u30fb\u30e1\u30bf\u60c5\u5831 -->\n  <div class=\"ap-meta\">\n    <span class=\"ap-tag ap-tag-ai\">AI \/ LLM<\/span>\n    <span class=\"ap-tag ap-tag-ml\">Machine Learning<\/span>\n    <span class=\"ap-tag ap-tag-ref\">\u8ad6\u6587\u89e3\u8aac: arXiv 2305.18290<\/span>\n    <span class=\"ap-date\">2025.02.17<\/span>\n  <\/div>\n\n\u682a\u5f0f\u4f1a\u793e\u30a2\u30d7\u30ea\u30ba\u30e0\u306e\u30c6\u30c3\u30af\u8a18\u4e8b\u306b\u306a\u308a\u307e\u3059\u3002\n\n  <!-- \u30ea\u30fc\u30c9\u30d6\u30ed\u30c3\u30af -->\n  <div class=\"ap-lead-block\">\n    <p>\n      \u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\uff08LLM\uff09\u306e\u7cbe\u5ea6\u3068\u5b89\u5168\u6027\u3092\u5411\u4e0a\u3055\u305b\u308b\u9375\u3068\u306a\u308b\u5f37\u5316\u5b66\u7fd2\uff08Reinforcement Learning\uff09\u3002ChatGPT\u3084Claude\u304c\u300c\u4eba\u9593\u3089\u3057\u3044\u300d\u5fdc\u7b54\u3092\u8fd4\u305b\u308b\u7406\u7531\u306f\u3053\u3053\u306b\u3042\u308a\u307e\u3059\u3002\u672c\u8a18\u4e8b\u3067\u306f\u3001RLHF\u306e\u57fa\u790e\u304b\u3089\u6700\u65b0\u624b\u6cd5\u307e\u3067\u3001Stanford\u5927\u5b66\u306e\u8ad6\u6587\uff08DPO \/ NeurIPS 2023\uff09\u3092\u4ea4\u3048\u3066\u30a8\u30f3\u30b8\u30cb\u30a2\u76ee\u7dda\u3067\u4e01\u5be7\u306b\u89e3\u8aac\u3057\u307e\u3059\u3002\n    <\/p>\n  <\/div>\n\n  <!-- \u76ee\u6b21 -->\n  <div class=\"ap-toc\">\n    <p class=\"ap-toc-title\">\u76ee\u6b21<\/p>\n    <ol>\n      <li><a href=\"#ap-s01\">\u305d\u3082\u305d\u3082\u300c\u5f37\u5316\u5b66\u7fd2\u300d\u3068\u306f\u4f55\u304b<\/a><\/li>\n      <li><a href=\"#ap-s02\">RLHF\u306e\u4ed5\u7d44\u307f\u3092\u7406\u89e3\u3059\u308b<\/a><\/li>\n      <li><a href=\"#ap-s03\">RLHF\u306e\u8ab2\u984c\u3068\u4ee3\u66ff\u30a2\u30d7\u30ed\u30fc\u30c1<\/a>\n        <ul>\n          <li class=\"sub\"><a href=\"#ap-s03a\">DPO\u306e\u6838\u5fc3\u7684\u30a2\u30a4\u30c7\u30a2<\/a><\/li>\n          <li class=\"sub\"><a href=\"#ap-s03b\">\u52fe\u914d\u304c\u610f\u5473\u3059\u308b\u3053\u3068<\/a><\/li>\n          <li class=\"sub\"><a href=\"#ap-s03c\">PPO\u3068\u306e\u5b9f\u9a13\u6bd4\u8f03<\/a><\/li>\n        <\/ul>\n      <\/li>\n      <li><a href=\"#ap-s04\">Constitutional AI \u3068 RLAIF<\/a><\/li>\n      <li><a href=\"#ap-s05\">\u5b9f\u88c5\u306b\u304a\u3051\u308b\u4e3b\u8981\u306a\u8ab2\u984c<\/a><\/li>\n      <li><a href=\"#ap-s06\">\u6700\u65b0\u30c8\u30ec\u30f3\u30c9\uff1a2025\u5e74\u306e\u52d5\u5411<\/a><\/li>\n      <li><a href=\"#ap-s07\">\u30a2\u30d7\u30ea\u30ba\u30e0\u306e\u53d6\u308a\u7d44\u307f\u3068\u4eca\u5f8c\u306e\u5c55\u671b<\/a><\/li>\n    <\/ol>\n  <\/div>\n\n\n  <!-- ============ Section 01 ============ -->\n  <div class=\"ap-section-head\" id=\"ap-s01\">\n    <span class=\"ap-section-num\">01<\/span>\n    <div class=\"ap-section-line\"><\/div>\n  <\/div>\n  <h2>\u305d\u3082\u305d\u3082\u300c\u5f37\u5316\u5b66\u7fd2\u300d\u3068\u306f\u4f55\u304b<\/h2>\n\n  <p>\u6a5f\u68b0\u5b66\u7fd2\u306e\u4e00\u5206\u91ce\u3067\u3042\u308b<strong>\u5f37\u5316\u5b66\u7fd2\uff08Reinforcement Learning: RL\uff09<\/strong>\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3057\u306a\u304c\u3089\u8a66\u884c\u932f\u8aa4\u3092\u7e70\u308a\u8fd4\u3057\u3001\u7d2f\u7a4d\u5831\u916c\u3092\u6700\u5927\u5316\u3059\u308b\u3088\u3046\u306a\u884c\u52d5\u65b9\u7b56\u3092\u5b66\u7fd2\u3059\u308b\u30a2\u30d7\u30ed\u30fc\u30c1\u3067\u3059\u3002\u30b2\u30fc\u30e0AI\u3084\u81ea\u5f8b\u30ed\u30dc\u30c3\u30c8\u5236\u5fa1\u306e\u5206\u91ce\u3067\u9577\u5e74\u7814\u7a76\u3055\u308c\u3066\u304d\u307e\u3057\u305f\u304c\u3001\u8fd1\u5e74\u306fLLM\u306e\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u306b\u5fdc\u7528\u3055\u308c\u3001\u305d\u306e\u91cd\u8981\u6027\u304c\u6025\u901f\u306b\u9ad8\u307e\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n  <p>\u5f37\u5316\u5b66\u7fd2\u306e\u57fa\u672c\u69cb\u9020\u306f\u300c<strong>\u72b6\u614b \u2192 \u884c\u52d5 \u2192 \u5831\u916c<\/strong>\u300d\u306e\u30eb\u30fc\u30d7\u3067\u3059\u3002LLM\u306b\u5f53\u3066\u306f\u3081\u308b\u3068\u3001\u30e6\u30fc\u30b6\u30fc\u306e\u5165\u529b\u30c6\u30ad\u30b9\u30c8\uff08\u72b6\u614b\uff09\u306b\u5bfe\u3057\u3066\u30c8\u30fc\u30af\u30f3\u3092\u751f\u6210\u3057\uff08\u884c\u52d5\uff09\u3001\u305d\u306e\u5fdc\u7b54\u54c1\u8cea\u306b\u5bfe\u3057\u3066\u30b9\u30b3\u30a2\u3092\u53d7\u3051\u53d6\u308b\uff08\u5831\u916c\uff09\u3068\u3044\u3046\u5f62\u5f0f\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n  <div class=\"ap-callout\">\n    <span class=\"ap-callout-label\">\ud83d\udca1 \u30dd\u30a4\u30f3\u30c8<\/span>\n    <p>LLM\u306b\u5f37\u5316\u5b66\u7fd2\u3092\u9069\u7528\u3059\u308b\u76ee\u7684\u306f\u300c\u4eba\u9593\u306e\u4fa1\u5024\u89b3\u3084\u597d\u307f\u306b\u6cbf\u3063\u305f\u51fa\u529b\u3092\u751f\u6210\u3055\u305b\u308b\u3053\u3068\u300d\u3067\u3059\u3002\u4e8b\u524d\u5b66\u7fd2\uff08Pretraining\uff09\u3060\u3051\u3067\u306f\u3001\u6709\u5bb3\u306a\u30b3\u30f3\u30c6\u30f3\u30c4\u3084\u4e0d\u8aa0\u5b9f\u306a\u56de\u7b54\u304c\u751f\u6210\u3055\u308c\u308b\u30ea\u30b9\u30af\u304c\u3042\u308a\u3001\u3053\u308c\u3092\u6291\u5236\u3059\u308b\u306e\u304c\u5f37\u5316\u5b66\u7fd2\u306e\u5f79\u5272\u3067\u3059\u3002<\/p>\n  <\/div>\n\n\n  <!-- ============ Section 02 ============ -->\n  <div class=\"ap-section-head\" id=\"ap-s02\">\n    <span class=\"ap-section-num\">02<\/span>\n    <div class=\"ap-section-line\"><\/div>\n  <\/div>\n  <h2>RLHF\u306e\u4ed5\u7d44\u307f\u3092\u7406\u89e3\u3059\u308b<\/h2>\n\n  <p><strong>RLHF\uff08Reinforcement Learning from Human Feedback\uff09<\/strong>\u306f\u3001\u73fe\u5728\u6700\u3082\u5e83\u304f\u63a1\u7528\u3055\u308c\u3066\u3044\u308bLLM\u8abf\u6574\u624b\u6cd5\u3067\u3059\u3002OpenAI\u304cInstructGPT\u3067\u63d0\u5531\u3057\u3001ChatGPT\u3067\u3082\u63a1\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002RLHF\u306f\u4e3b\u306b3\u3064\u306e\u30d5\u30a7\u30fc\u30ba\u3067\u69cb\u6210\u3055\u308c\u307e\u3059\u3002<\/p>\n\n  <div class=\"ap-process\">\n    <div class=\"ap-step\">\n      <div class=\"ap-step-num\">01<\/div>\n      <div class=\"ap-step-body\">\n        <h4>\u6559\u5e2b\u3042\u308a\u5fae\u8abf\u6574\uff08SFT: Supervised Fine-Tuning\uff09<\/h4>\n        <p>\u4eba\u9593\u304c\u4f5c\u6210\u3057\u305f\u9ad8\u54c1\u8cea\u306a\uff08\u30d7\u30ed\u30f3\u30d7\u30c8, \u5fdc\u7b54\uff09\u30da\u30a2\u3092\u7528\u3044\u3066\u3001\u30d9\u30fc\u30b9\u30e2\u30c7\u30eb\u3092\u5fae\u8abf\u6574\u3057\u307e\u3059\u3002\u3053\u308c\u306b\u3088\u308a\u3001\u6307\u793a\u306b\u5f93\u3046\u57fa\u672c\u7684\u306a\u80fd\u529b\u3092\u7372\u5f97\u3055\u305b\u307e\u3059\u3002<\/p>\n      <\/div>\n    <\/div>\n    <div class=\"ap-step\">\n      <div class=\"ap-step-num\">02<\/div>\n      <div class=\"ap-step-body\">\n        <h4>\u5831\u916c\u30e2\u30c7\u30eb\u306e\u5b66\u7fd2\uff08Reward Model Training\uff09<\/h4>\n        <p>\u8907\u6570\u306e\u5fdc\u7b54\u5019\u88dc\u306b\u5bfe\u3057\u3066\u4eba\u9593\u306e\u30e9\u30f3\u30ab\u30fc\u304c\u512a\u52a3\u3092\u3064\u3051\u3001\u305d\u306e\u30c7\u30fc\u30bf\u3092\u5143\u306b\u300c\u3069\u306e\u5fdc\u7b54\u304c\u826f\u3044\u304b\u300d\u3092\u5224\u5b9a\u3059\u308b\u5831\u916c\u30e2\u30c7\u30eb\uff08RM\uff09\u3092\u5b66\u7fd2\u3055\u305b\u307e\u3059\u3002\u3053\u306eRM\u304cRL\u4e2d\u306e\u300c\u5be9\u5224\u300d\u3068\u3057\u3066\u6a5f\u80fd\u3057\u307e\u3059\u3002<\/p>\n      <\/div>\n    <\/div>\n    <div class=\"ap-step\">\n      <div class=\"ap-step-num\">03<\/div>\n      <div class=\"ap-step-body\">\n        <h4>PPO\u306b\u3088\u308b\u5f37\u5316\u5b66\u7fd2\uff08RL with PPO\uff09<\/h4>\n        <p>Proximal Policy Optimization\uff08PPO\uff09\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u7528\u3044\u3066\u3001\u5831\u916c\u30e2\u30c7\u30eb\u306e\u30b9\u30b3\u30a2\u3092\u6700\u5927\u5316\u3059\u308b\u3088\u3046\u306bSFT\u30e2\u30c7\u30eb\u3092\u66f4\u65b0\u3057\u307e\u3059\u3002KL\u30c0\u30a4\u30d0\u30fc\u30b8\u30a7\u30f3\u30b9\u5236\u7d04\u306b\u3088\u308a\u3001\u5143\u306e\u30e2\u30c7\u30eb\u304b\u3089\u306e\u904e\u5ea6\u306a\u9038\u8131\u3092\u9632\u304e\u307e\u3059\u3002<\/p>\n      <\/div>\n    <\/div>\n  <\/div>\n\n  <div class=\"ap-code-wrap\">\n    <div class=\"ap-code-head\">\n      <div class=\"ap-code-dots\"><span><\/span><span><\/span><span><\/span><\/div>\n      <span class=\"ap-code-lang\">PYTHON \u00b7 RLHF \u5831\u916c\u8a08\u7b97\uff08\u8ad6\u6587 Eq.3\uff09<\/span>\n    <\/div>\n    <pre><span class=\"ap-cm\"># RLHF \u306e RL \u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u76ee\u6a19\uff08\u8ad6\u6587 Eq.3\uff09:\n# max_\u03c0\u03b8 E[r\u03d5(x,y)] - \u03b2\u00b7KL[\u03c0\u03b8(y|x) || \u03c0ref(y|x)]<\/span>\n<span class=\"ap-kw\">def<\/span> <span class=\"ap-fn\">compute_rlhf_reward<\/span>(response, prompt, reward_model, policy, ref_policy, beta=<span class=\"ap-num\">0.1<\/span>):\n    rm_score   = reward_model(prompt, response)\n    log_prob_new = policy.log_prob(response, prompt)\n    log_prob_ref = ref_policy.log_prob(response, prompt)\n    kl_penalty   = beta * (log_prob_new - log_prob_ref)\n    <span class=\"ap-kw\">return<\/span> rm_score - kl_penalty\n\n<span class=\"ap-kw\">for<\/span> step <span class=\"ap-kw\">in<\/span> <span class=\"ap-fn\">range<\/span>(training_steps):\n    prompts   = sample_batch(dataset)\n    responses = policy.generate(prompts)\n    rewards   = [<span class=\"ap-fn\">compute_rlhf_reward<\/span>(r, p, rm, policy, ref, beta=<span class=\"ap-num\">0.1<\/span>)\n                 <span class=\"ap-kw\">for<\/span> r, p <span class=\"ap-kw\">in<\/span> <span class=\"ap-fn\">zip<\/span>(responses, prompts)]\n    policy.<span class=\"ap-fn\">ppo_update<\/span>(prompts, responses, rewards)<\/pre>\n  <\/div>\n\n\n  <!-- ============ Section 03 ============ -->\n  <div class=\"ap-section-head\" id=\"ap-s03\">\n    <span class=\"ap-section-num\">03<\/span>\n    <div class=\"ap-section-line\"><\/div>\n  <\/div>\n  <h2>RLHF\u306e\u8ab2\u984c\u3068\u4ee3\u66ff\u30a2\u30d7\u30ed\u30fc\u30c1<\/h2>\n\n  <p>RLHF\u306f\u5f37\u529b\u306a\u624b\u6cd5\u3067\u3059\u304c\u3001\u5b9f\u88c5\u306e\u8907\u96d1\u3055\u3084\u8a08\u7b97\u30b3\u30b9\u30c8\u9762\u3067\u306e\u8ab2\u984c\u3082\u3042\u308a\u307e\u3059\u3002\u7279\u306b<strong>PPO\u306e\u4e0d\u5b89\u5b9a\u6027<\/strong>\u3068<strong>\u5831\u916c\u30e2\u30c7\u30eb\u306e\u904e\u6700\u9069\u5316\uff08Reward Hacking\uff09<\/strong>\u306f\u5b9f\u52d9\u4e0a\u306e\u5927\u304d\u306a\u969c\u58c1\u3067\u3059\u3002PPO\u306f\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u4e2d\u306bLM\u304b\u3089\u306e\u30b5\u30f3\u30d7\u30ea\u30f3\u30b0\u3092\u30eb\u30fc\u30d7\u5185\u3067\u884c\u3046\u5fc5\u8981\u304c\u3042\u308a\u3001\u30dd\u30ea\u30b7\u30fc\u30fb\u5831\u916c\u30e2\u30c7\u30eb\u30fb\u53c2\u7167\u30e2\u30c7\u30eb\u30fb\u30af\u30ea\u30c6\u30a3\u30c3\u30af\u306e4\u30e2\u30c7\u30eb\u3092\u540c\u6642\u306b\u4fdd\u6301\u3059\u308b\u305f\u3081\u3001\u8a08\u7b97\u30b3\u30b9\u30c8\u304c\u8457\u3057\u304f\u81a8\u5927\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n  <div class=\"ap-card-grid\">\n    <div class=\"ap-card ap-card-a\">\n      <span class=\"ap-card-icon\">\ud83c\udfaf<\/span>\n      <h4>DPO <span class=\"ap-badge ap-badge-new\">NeurIPS 2023<\/span><\/h4>\n      <p>\u5831\u916c\u30e2\u30c7\u30eb\u3092\u967d\u306b\u5b66\u7fd2\u305b\u305a\u3001\u5358\u7d14\u306a\u4e8c\u5024\u30af\u30ed\u30b9\u30a8\u30f3\u30c8\u30ed\u30d4\u30fc\u640d\u5931\u3060\u3051\u3067\u30dd\u30ea\u30b7\u30fc\u3092\u76f4\u63a5\u6700\u9069\u5316\u3002PPO\u3068\u540c\u7b49\u4ee5\u4e0a\u306e\u6027\u80fd\u3092\u9054\u6210\u3002<\/p>\n    <\/div>\n    <div class=\"ap-card ap-card-b\">\n      <span class=\"ap-card-icon\">\ud83d\udd04<\/span>\n      <h4>RLHF (PPO) <span class=\"ap-badge ap-badge-old\">Classic<\/span><\/h4>\n      <p>\u6700\u3082\u5b9f\u7e3e\u306e\u3042\u308b\u624b\u6cd5\u3002\u8907\u96d1\u3060\u304c\u67d4\u8edf\u6027\u304c\u9ad8\u304f\u3001\u5927\u898f\u6a21\u30e2\u30c7\u30eb\u3067\u306e\u6027\u80fd\u304c\u5b9f\u8a3c\u3055\u308c\u3066\u3044\u308b\u3002\u5831\u916c\u30e2\u30c7\u30eb\u5b66\u7fd2\u3068RL\u6700\u9069\u5316\u306e2\u6bb5\u968e\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u3002<\/p>\n    <\/div>\n    <div class=\"ap-card ap-card-c\">\n      <span class=\"ap-card-icon\">\u26a1<\/span>\n      <h4>RLAIF<\/h4>\n      <p>RL from AI Feedback\u3002\u4eba\u9593\u306e\u4ee3\u308f\u308a\u306bAI\uff08\u5225LLM\uff09\u304c\u5831\u916c\u30b7\u30b0\u30ca\u30eb\u3092\u63d0\u4f9b\u3002\u30b9\u30b1\u30fc\u30e9\u30d6\u30eb\u3067\u9ad8\u901f\u3002Constitutional AI\u3067\u3082\u63a1\u7528\u3002<\/p>\n    <\/div>\n  <\/div>\n\n  <h3 id=\"ap-s03a\">DPO\u306e\u6838\u5fc3\u7684\u30a2\u30a4\u30c7\u30a2<\/h3>\n  <p>2023\u5e74\u306bStanford\u5927\u5b66\u304cNeurIPS 2023\u3067\u767a\u8868\u3057\u305fDPO\uff08\u8ad6\u6587\uff1a<em>\u300cDirect Preference Optimization: Your Language Model is Secretly a Reward Model\u300d<\/em>\uff09\u306f\u3001LLM\u3078\u306e\u5f37\u5316\u5b66\u7fd2\u9069\u7528\u306b\u304a\u3051\u308b\u6839\u672c\u7684\u306a\u767a\u60f3\u306e\u8ee2\u63db\u3067\u3059\u3002\u30bf\u30a4\u30c8\u30eb\u306b\u3042\u308b\u901a\u308a\u3001<strong>\u300c\u8a00\u8a9e\u30e2\u30c7\u30eb\u81ea\u4f53\u304c\u6697\u9ed9\u7684\u306a\u5831\u916c\u30e2\u30c7\u30eb\u3067\u3042\u308b\u300d<\/strong>\u3068\u3044\u3046\u6d1e\u5bdf\u304c\u305d\u306e\u6838\u5fc3\u3067\u3059\u3002<\/p>\n\n  <p>RLHF\u3067\u306f\u300c\u5831\u916c\u30e2\u30c7\u30eb\u306e\u5b66\u7fd2 \u2192 PPO\u306b\u3088\u308b\u30dd\u30ea\u30b7\u30fc\u6700\u9069\u5316\u300d\u3068\u3044\u30462\u6bb5\u968e\u304c\u5fc5\u8981\u3067\u3057\u305f\u3002DPO\u306fKL\u5236\u7d04\u4ed8\u304d\u5831\u916c\u6700\u5927\u5316\u306e\u6700\u9069\u89e3\u304c\u9589\u5f62\u5f0f\u3067\u5c0e\u51fa\u3067\u304d\u308b\u3068\u3044\u3046\u6570\u5b66\u7684\u4e8b\u5b9f\u3092\u6d3b\u7528\u3057\u3001\u3053\u306e2\u6bb5\u968e\u3092<strong>\u5358\u4e00\u306e\u5206\u985e\u640d\u5931<\/strong>\u306b\u5909\u63db\u3059\u308b\u3053\u3068\u306b\u6210\u529f\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n  <div class=\"ap-callout\">\n    <span class=\"ap-callout-label\">\ud83d\udd2c \u6570\u5b66\u7684\u76f4\u611f<\/span>\n    <p>Bradley-Terry \u30e2\u30c7\u30eb\u306f\u9078\u597d\u78ba\u7387\u3092\u5831\u916c\u5dee\u5206\u306e\u95a2\u6570\u3068\u3057\u3066\u5b9a\u7fa9\u3057\u307e\u3059\u3002DPO \u306f\u3053\u306e\u95a2\u4fc2\u3092\u9006\u7528\u3057\u3001\u5831\u916c\u95a2\u6570\u3092\u300c\u30dd\u30ea\u30b7\u30fc\u306e\u5bfe\u6570\u78ba\u7387\u6bd4 \u03b2\u00b7log(\u03c0(y|x) \/ \u03c0_ref(y|x))\u300d\u3068\u3057\u3066\u8868\u73fe\u3057\u307e\u3059\u3002\u3053\u306e\u5909\u6570\u5909\u63db\u306b\u3088\u308a\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u95a2\u6570\u304c\u6d88\u3048\u3001\u5831\u916c\u30e2\u30c7\u30eb\u3092\u967d\u306b\u5b66\u7fd2\u3059\u308b\u3053\u3068\u306a\u304f\u3001\u30dd\u30ea\u30b7\u30fc\u81ea\u4f53\u3092\u640d\u5931\u95a2\u6570\u306e\u5f15\u6570\u3068\u3057\u3066\u76f4\u63a5\u6700\u9069\u5316\u3067\u304d\u307e\u3059\u3002<\/p>\n  <\/div>\n\n  <div class=\"ap-code-wrap\">\n    <div class=\"ap-code-head\">\n      <div class=\"ap-code-dots\"><span><\/span><span><\/span><span><\/span><\/div>\n      <span class=\"ap-code-lang\">PYTHON \u00b7 DPO Loss \u2014 \u8ad6\u6587 Eq.(7)<\/span>\n    <\/div>\n    <pre><span class=\"ap-kw\">import<\/span> torch.nn.functional <span class=\"ap-kw\">as<\/span> F\n\n<span class=\"ap-kw\">def<\/span> <span class=\"ap-fn\">dpo_loss<\/span>(policy, ref_policy, chosen, rejected, prompt, beta=<span class=\"ap-num\">0.1<\/span>):\n    <span class=\"ap-cm\">\"\"\"\n    L_DPO = -E[log \u03c3(\u03b2\u00b7log(\u03c0_\u03b8(yw|x)\/\u03c0_ref(yw|x)) - \u03b2\u00b7log(\u03c0_\u03b8(yl|x)\/\u03c0_ref(yl|x)))]\n    chosen(yw): \u4eba\u9593\u304c\u597d\u3080\u5fdc\u7b54 \/ rejected(yl): \u4eba\u9593\u304c\u597d\u307e\u306a\u3044\u5fdc\u7b54\n    \"\"\"<\/span>\n    log_p_chosen   = policy.log_prob(chosen,   prompt)\n    log_p_rejected = policy.log_prob(rejected, prompt)\n    log_r_chosen   = ref_policy.log_prob(chosen,   prompt)\n    log_r_rejected = ref_policy.log_prob(rejected, prompt)\n\n    <span class=\"ap-cm\"># \u6697\u9ed9\u306e\u5831\u916c\u5dee\u5206: r\u0302(x,yw) - r\u0302(x,yl)<\/span>\n    logits = beta * (\n        (log_p_chosen   - log_r_chosen) -\n        (log_p_rejected - log_r_rejected)\n    )\n    <span class=\"ap-kw\">return<\/span> -F.<span class=\"ap-fn\">logsigmoid<\/span>(logits).<span class=\"ap-fn\">mean<\/span>()<\/pre>\n  <\/div>\n\n  <h3 id=\"ap-s03b\">DPO \u306e\u52fe\u914d\u304c\u610f\u5473\u3059\u308b\u3053\u3068<\/h3>\n  <p>\u8ad6\u6587\u3067\u306fDPO\u306e\u640d\u5931\u95a2\u6570\u306e\u52fe\u914d\u3092\u89e3\u6790\u7684\u306b\u5206\u89e3\u3057\u3066\u304a\u308a\u3001\u305d\u306e\u6319\u52d5\u304c\u660e\u5feb\u306b\u7406\u89e3\u3067\u304d\u307e\u3059\u3002\u52fe\u914d\u306f\u5927\u304d\u304f2\u9805\u304b\u3089\u6210\u308a\u307e\u3059\u3002\u7b2c1\u9805\u306f<strong>\u597d\u307e\u308c\u308b\u5fdc\u7b54 y_w \u306e\u5c24\u5ea6\u3092\u4e0a\u3052\u308b\u65b9\u5411<\/strong>\u3001\u7b2c2\u9805\u306f<strong>\u597d\u307e\u308c\u306a\u3044\u5fdc\u7b54 y_l \u306e\u5c24\u5ea6\u3092\u4e0b\u3052\u308b\u65b9\u5411<\/strong>\u3067\u3059\u3002\u91cd\u8981\u306a\u306e\u306f\u5404\u30b5\u30f3\u30d7\u30eb\u306b\u4ed8\u4e0e\u3055\u308c\u308b\u91cd\u307f \u03c3(r\u0302(y_l) \u2212 r\u0302(y_w)) \u3067\u3001<strong>\u6697\u9ed9\u306e\u5831\u916c\u30e2\u30c7\u30eb\u304c\u9078\u597d\u9806\u5e8f\u3092\u8aa4\u3063\u3066\u63a8\u5b9a\u3057\u3066\u3044\u308b\u30b5\u30f3\u30d7\u30eb\u307b\u3069\u3001\u5927\u304d\u306a\u91cd\u307f\u3067\u66f4\u65b0\u3055\u308c\u308b<\/strong>\u4ed5\u7d44\u307f\u3067\u3059\u3002\u3053\u306e\u52d5\u7684\u306a\u91cd\u307f\u3065\u3051\u304c\u3001\u7d20\u6734\u306a\u78ba\u7387\u6bd4\u6700\u9069\u5316\uff08Unlikelihood\u6cd5\uff09\u3067\u8d77\u304d\u308b\u30e2\u30c7\u30eb\u52a3\u5316\u3092\u9632\u3050\u9375\u3067\u3059\u3002<\/p>\n\n  <h3 id=\"ap-s03c\">PPO \u3068\u306e\u5b9f\u9a13\u6bd4\u8f03\uff08\u8ad6\u6587\u3088\u308a\uff09<\/h3>\n  <p>\u8ad6\u6587\u3067\u306f\u611f\u60c5\u751f\u6210\u30fb\u8981\u7d04\u30fb\u5bfe\u8a71\u306e3\u30bf\u30b9\u30af\u3067\u8a55\u4fa1\u304c\u884c\u308f\u308c\u3066\u3044\u307e\u3059\u3002\u611f\u60c5\u751f\u6210\uff08IMDb\uff09\u3067\u306f\u3001DPO\u306f\u3059\u3079\u3066\u306eKL\u5024\u306b\u304a\u3044\u3066PPO\u3088\u308a\u3082\u9ad8\u3044\u671f\u5f85\u5831\u916c\u3092\u9054\u6210\u3057\u3001<strong>\u5831\u916c\/KL\u30c8\u30ec\u30fc\u30c9\u30aa\u30d5\u306e\u30d5\u30ed\u30f3\u30c6\u30a3\u30a2\u3067PPO\u3092\u4e0a\u56de\u308a\u307e\u3057\u305f<\/strong>\u3002\u8981\u7d04\u30bf\u30b9\u30af\uff08Reddit TL;DR\uff09\u3067\u306f\u6e29\u5ea60\u3067\u306eDPO\u306e\u52dd\u7387\u306f<strong>\u7d0461%<\/strong>\u3067PPO\u306e57%\u3092\u8d85\u3048\u3001\u3055\u3089\u306bDPO\u306f\u30b5\u30f3\u30d7\u30ea\u30f3\u30b0\u6e29\u5ea6\u306e\u5909\u5316\u306b\u5bfe\u3057\u3066PPO\u3088\u308a\u9811\u5065\u3067\u3057\u305f\u3002\u5bfe\u8a71\u30bf\u30b9\u30af\uff08Anthropic HH\uff09\u3067\u306f\u3001DPO\u306e\u307f\u304c\u9078\u597d\u6e08\u307f\u5fdc\u7b54\u3088\u308a\u9ad8\u3044\u30b9\u30b3\u30a2\u3092\u9054\u6210\u3057\u305f\u552f\u4e00\u306e\u8a08\u7b97\u52b9\u7387\u7684\u306a\u624b\u6cd5\u3067\u3059\u3002<\/p>\n\n  <div class=\"ap-table-wrap\">\n    <table class=\"ap-table\">\n      <thead>\n        <tr>\n          <th>\u624b\u6cd5<\/th>\n          <th>\u5b66\u7fd2\u306e\u5b89\u5b9a\u6027<\/th>\n          <th>\u8a08\u7b97\u30b3\u30b9\u30c8<\/th>\n          <th>\u5b9f\u88c5\u96e3\u6613\u5ea6<\/th>\n          <th>\u4e3b\u306a\u63a1\u7528\u4f8b<\/th>\n        <\/tr>\n      <\/thead>\n      <tbody>\n        <tr>\n          <td>RLHF (PPO)<\/td>\n          <td>\u25b3 \u3084\u3084\u4e0d\u5b89\u5b9a<\/td>\n          <td>\u9ad8\u3044\uff084\u30e2\u30c7\u30eb\u540c\u6642\uff09<\/td>\n          <td>\u9ad8\u3044<\/td>\n          <td>ChatGPT, InstructGPT<\/td>\n        <\/tr>\n        <tr>\n          <td>DPO<\/td>\n          <td>\u25ef \u5b89\u5b9a<\/td>\n          <td>\u4f4e\u301c\u4e2d\uff08SFT\u3068\u540c\u7b49\uff09<\/td>\n          <td>\u4f4e\u3044<\/td>\n          <td>Llama 3, Zephyr, Mistral<\/td>\n        <\/tr>\n        <tr>\n          <td>RLAIF<\/td>\n          <td>\u25ef \u5b89\u5b9a<\/td>\n          <td>\u4e2d<\/td>\n          <td>\u4e2d<\/td>\n          <td>Claude (Constitutional AI)<\/td>\n        <\/tr>\n        <tr>\n          <td>IPO<\/td>\n          <td>\u25ce \u975e\u5e38\u306b\u5b89\u5b9a<\/td>\n          <td>\u4f4e\u3044<\/td>\n          <td>\u4f4e\u3044<\/td>\n          <td>\u7814\u7a76\u6bb5\u968e<\/td>\n        <\/tr>\n      <\/tbody>\n    <\/table>\n  <\/div>\n\n  <div class=\"ap-ref\">\n    <span class=\"ap-ref-label\">\ud83d\udcc4 \u53c2\u7167\u8ad6\u6587<\/span>\n    <p>Rafailov et al., <em>&#8220;Direct Preference Optimization: Your Language Model is Secretly a Reward Model&#8221;<\/em>, NeurIPS 2023.\uff08Stanford University\uff09\u3000arxiv: <strong>2305.18290<\/strong><\/p>\n  <\/div>\n\n\n  <!-- ============ Section 04 ============ -->\n  <div class=\"ap-section-head\" id=\"ap-s04\">\n    <span class=\"ap-section-num\">04<\/span>\n    <div class=\"ap-section-line\"><\/div>\n  <\/div>\n  <h2>Constitutional AI \u3068 RLAIF<\/h2>\n\n  <p>Anthropic\u304c\u63d0\u5531\u3059\u308b<strong>Constitutional AI\uff08CAI\uff09<\/strong>\u306f\u3001\u300c\u539f\u5247\uff08Constitution\uff09\u300d\u3068\u547c\u3070\u308c\u308b\u30eb\u30fc\u30eb\u30bb\u30c3\u30c8\u3092\u4f7f\u3063\u3066AI\u81ea\u8eab\u306b\u81ea\u5df1\u6279\u5224\u30fb\u4fee\u6b63\u3092\u884c\u308f\u305b\u3001\u5b89\u5168\u3067\u6709\u7528\u306a\u5fdc\u7b54\u3092\u5f15\u304d\u51fa\u3059\u624b\u6cd5\u3067\u3059\u3002RLAIF\u3067\u306f\u3001\u4eba\u9593\u306e\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u306e\u4ee3\u308f\u308a\u306bAI\u30e2\u30c7\u30eb\u304c\u300c\u3069\u3061\u3089\u306e\u5fdc\u7b54\u304c\u512a\u308c\u3066\u3044\u308b\u304b\u300d\u3092\u8a55\u4fa1\u3057\u3001\u305d\u308c\u3092\u30b7\u30b0\u30ca\u30eb\u3068\u3057\u3066\u5f37\u5316\u5b66\u7fd2\u3092\u884c\u3044\u307e\u3059\u3002<\/p>\n\n  <div class=\"ap-callout\">\n    <span class=\"ap-callout-label\">\ud83c\udfdb\ufe0f Constitutional AI \u306e\u539f\u5247\u4f8b<\/span>\n    <p>\u300c\u6709\u5bb3\u30fb\u5371\u967a\u30fb\u975e\u502b\u7406\u7684\u30fb\u9055\u6cd5\u306a\u30b3\u30f3\u30c6\u30f3\u30c4\u3092\u542b\u3080\u5fdc\u7b54\u3092\u9078\u3070\u306a\u3044\u3067\u304f\u3060\u3055\u3044\u300d\u300c\u4eba\u9593\u306e\u81ea\u5f8b\u6027\u3092\u5c0a\u91cd\u3057\u3001\u30e6\u30fc\u30b6\u30fc\u304c\u81ea\u3089\u5224\u65ad\u3067\u304d\u308b\u3088\u3046\u652f\u63f4\u3057\u3066\u304f\u3060\u3055\u3044\u300d\u300c\u4e0d\u78ba\u304b\u306a\u60c5\u5831\u306b\u3064\u3044\u3066\u306f\u3001\u78ba\u4fe1\u3092\u6301\u3063\u3066\u3044\u308b\u3088\u3046\u306b\u898b\u305b\u304b\u3051\u306a\u3044\u3067\u304f\u3060\u3055\u3044\u300d<\/p>\n  <\/div>\n\n  <p>\u3053\u306e\u30a2\u30d7\u30ed\u30fc\u30c1\u306e\u6700\u5927\u306e\u5229\u70b9\u306f<strong>\u30b9\u30b1\u30fc\u30e9\u30d3\u30ea\u30c6\u30a3<\/strong>\u3067\u3059\u3002\u4eba\u9593\u306e\u30a2\u30ce\u30c6\u30fc\u30bf\u30fc\u306e\u30dc\u30c8\u30eb\u30cd\u30c3\u30af\u3092\u89e3\u6d88\u3057\u3064\u3064\u3001\u6570\u4e07\u301c\u6570\u767e\u4e07\u4ef6\u306e\u9078\u597d\u30c7\u30fc\u30bf\u3092\u81ea\u52d5\u751f\u6210\u3067\u304d\u307e\u3059\u3002\u305f\u3060\u3057\u3001AI\u306b\u3088\u308b\u8a55\u4fa1\u304c\u30d9\u30fc\u30b9\u30e2\u30c7\u30eb\u306e\u30d0\u30a4\u30a2\u30b9\u3092\u5f15\u304d\u7d99\u3050\u30ea\u30b9\u30af\u3082\u5185\u5305\u3057\u3066\u304a\u308a\u3001\u9069\u5207\u306a\u8a2d\u8a08\u3068\u691c\u8a3c\u304c\u4e0d\u53ef\u6b20\u3067\u3059\u3002<\/p>\n\n\n  <!-- ============ Section 05 ============ -->\n  <div class=\"ap-section-head\" id=\"ap-s05\">\n    <span class=\"ap-section-num\">05<\/span>\n    <div class=\"ap-section-line\"><\/div>\n  <\/div>\n  <h2>\u5b9f\u88c5\u306b\u304a\u3051\u308b\u4e3b\u8981\u306a\u8ab2\u984c<\/h2>\n\n  <div class=\"ap-challenges\">\n    <div class=\"ap-challenge\">\n      <div class=\"ap-challenge-icon\">\u26a0\ufe0f<\/div>\n      <div>\n        <h4>Reward Hacking\uff08\u5831\u916c\u30cf\u30c3\u30ad\u30f3\u30b0\uff09<\/h4>\n        <p>\u30e2\u30c7\u30eb\u304c\u5831\u916c\u30e2\u30c7\u30eb\u306e\u300c\u7a74\u300d\u3092\u7a81\u304d\u3001\u5b9f\u969b\u306b\u306f\u54c1\u8cea\u304c\u4f4e\u3044\u306b\u3082\u304b\u304b\u308f\u3089\u305a\u9ad8\u30b9\u30b3\u30a2\u3092\u5f97\u308b\u5fdc\u7b54\u3092\u751f\u6210\u3059\u308b\u73fe\u8c61\u3002\u4f8b\uff1a\u9577\u3044\u5fdc\u7b54\u3092\u751f\u6210\u3059\u308b\u307b\u3069\u9ad8\u30b9\u30b3\u30a2\u306b\u306a\u308b\u5831\u916c\u30e2\u30c7\u30eb\u3092\u904e\u5b66\u7fd2\u3055\u305b\u308b\u3068\u3001\u5197\u9577\u306a\u51fa\u529b\u304c\u5897\u52a0\u3059\u308b\u3002<\/p>\n      <\/div>\n    <\/div>\n    <div class=\"ap-challenge\">\n      <div class=\"ap-challenge-icon\">\ud83d\udcca<\/div>\n      <div>\n        <h4>\u30a2\u30ce\u30c6\u30fc\u30b7\u30e7\u30f3\u306e\u54c1\u8cea\u3068\u4e00\u8cab\u6027<\/h4>\n        <p>\u4eba\u9593\u306e\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u306f\u4e3b\u89b3\u7684\u3067\u3042\u308a\u3001\u30a2\u30ce\u30c6\u30fc\u30bf\u30fc\u9593\u3067\u306e\u3070\u3089\u3064\u304d\uff08Inter-annotator Agreement\uff09\u304c\u4f4e\u3044\u5834\u5408\u3001\u5831\u916c\u30e2\u30c7\u30eb\u304c\u77db\u76fe\u3057\u305f\u30b7\u30b0\u30ca\u30eb\u3092\u5b66\u7fd2\u3057\u3066\u3057\u307e\u3046\u3002\u660e\u78ba\u306a\u30ac\u30a4\u30c9\u30e9\u30a4\u30f3\u3068\u30c0\u30d6\u30eb\u30c1\u30a7\u30c3\u30af\u304c\u5fc5\u8981\u3002<\/p>\n      <\/div>\n    <\/div>\n    <div class=\"ap-challenge\">\n      <div class=\"ap-challenge-icon\">\ud83d\udcbe<\/div>\n      <div>\n        <h4>\u8a08\u7b97\u8cc7\u6e90\u3068\u30a4\u30f3\u30d5\u30e9\u30b3\u30b9\u30c8<\/h4>\n        <p>PPO\u30d9\u30fc\u30b9\u306eRLHF\u306f\u3001\u30dd\u30ea\u30b7\u30fc\u30e2\u30c7\u30eb\u30fb\u5831\u916c\u30e2\u30c7\u30eb\u30fb\u53c2\u7167\u30e2\u30c7\u30eb\u30fb\u30af\u30ea\u30c6\u30a3\u30c3\u30af\u3092\u540c\u6642\u306b\u4fdd\u6301\u30fb\u66f4\u65b0\u3059\u308b\u305f\u3081\u3001\u30e1\u30e2\u30ea\u4f7f\u7528\u91cf\u304c\u81a8\u5927\u300270B\u30d1\u30e9\u30e1\u30fc\u30bf\u306e\u30e2\u30c7\u30eb\u3067\u306f\u8907\u6570\u306eA100\/H100\u304c\u5fc5\u8981\u3002<\/p>\n      <\/div>\n    <\/div>\n    <div class=\"ap-challenge\">\n      <div class=\"ap-challenge-icon\">\ud83d\udd04<\/div>\n      <div>\n        <h4>Distribution Shift\uff08\u5206\u5e03\u30b7\u30d5\u30c8\uff09<\/h4>\n        <p>RL\u306e\u5b66\u7fd2\u304c\u9032\u3080\u306b\u3064\u308c\u3066\u3001\u30dd\u30ea\u30b7\u30fc\u304c\u751f\u6210\u3059\u308b\u5fdc\u7b54\u306e\u5206\u5e03\u304c\u3001\u5831\u916c\u30e2\u30c7\u30eb\u306e\u5b66\u7fd2\u30c7\u30fc\u30bf\u306e\u5206\u5e03\u304b\u3089\u4e56\u96e2\u3057\u3066\u3044\u304f\u554f\u984c\u3002\u5b9a\u671f\u7684\u306a\u5831\u916c\u30e2\u30c7\u30eb\u306e\u518d\u5b66\u7fd2\u3084\u30aa\u30f3\u30e9\u30a4\u30f3\u5b66\u7fd2\u3067\u5bfe\u5fdc\u3059\u308b\u5fc5\u8981\u304c\u3042\u308b\u3002<\/p>\n      <\/div>\n    <\/div>\n  <\/div>\n\n\n  <!-- ============ Section 06 ============ -->\n  <div class=\"ap-section-head\" id=\"ap-s06\">\n    <span class=\"ap-section-num\">06<\/span>\n    <div class=\"ap-section-line\"><\/div>\n  <\/div>\n  <h2>\u6700\u65b0\u30c8\u30ec\u30f3\u30c9\uff1a2025\u5e74\u306e\u52d5\u5411<\/h2>\n\n  <div class=\"ap-stat-row\">\n    <div class=\"ap-stat-box\">\n      <span class=\"ap-stat-val\">GRPO<\/span>\n      <span class=\"ap-stat-label\">Group Relative Policy Optimization<br>\u30af\u30ea\u30c6\u30a3\u30c3\u30af\u4e0d\u8981\u3067\u52b9\u7387\u5316<\/span>\n    <\/div>\n    <div class=\"ap-stat-box\">\n      <span class=\"ap-stat-val\">SimPO<\/span>\n      <span class=\"ap-stat-label\">Simple Preference Optimization<br>\u53c2\u7167\u30e2\u30c7\u30eb\u4e0d\u8981\u306e\u8efd\u91cf\u624b\u6cd5<\/span>\n    <\/div>\n    <div class=\"ap-stat-box\">\n      <span class=\"ap-stat-val\">RLVR<\/span>\n      <span class=\"ap-stat-label\">RL with Verifiable Rewards<br>\u691c\u8a3c\u53ef\u80fd\u30bf\u30b9\u30af\u3078\u306e\u5fdc\u7528<\/span>\n    <\/div>\n  <\/div>\n\n  <h3>GRPO\uff08Group Relative Policy Optimization\uff09<\/h3>\n  <p>DeepSeek\u304c\u63a1\u7528\u3057\u305f\u3053\u3068\u3067\u6ce8\u76ee\u3092\u96c6\u3081\u305fGRPO\u306f\u3001PPO\u306e\u30af\u30ea\u30c6\u30a3\u30c3\u30af\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u3092\u6392\u9664\u3057\u3001\u30b0\u30eb\u30fc\u30d7\u5185\u306e\u76f8\u5bfe\u7684\u306a\u5831\u916c\u3092\u4f7f\u3063\u3066\u30dd\u30ea\u30b7\u30fc\u3092\u66f4\u65b0\u3057\u307e\u3059\u3002\u540c\u4e00\u30d7\u30ed\u30f3\u30d7\u30c8\u304b\u3089\u8907\u6570\u306e\u5fdc\u7b54\u3092\u30b5\u30f3\u30d7\u30ea\u30f3\u30b0\u3057\u3001\u30b0\u30eb\u30fc\u30d7\u5185\u3067\u306e\u76f8\u5bfe\u7684\u306a\u512a\u52a3\u3092\u5b66\u7fd2\u30b7\u30b0\u30ca\u30eb\u3068\u3059\u308b\u305f\u3081\u3001<strong>\u30e1\u30e2\u30ea\u52b9\u7387\u304c\u5927\u5e45\u306b\u5411\u4e0a<\/strong>\u3057\u307e\u3059\u3002<\/p>\n\n  <h3>RLVR\uff08RL with Verifiable Rewards\uff09<\/h3>\n  <p>\u6570\u5b66\u554f\u984c\u306e\u6b63\u7b54\u3001\u30b3\u30fc\u30c9\u306e\u30c6\u30b9\u30c8\u30d1\u30b9\u3001\u8ad6\u7406\u7684\u63a8\u8ad6\u306a\u3069\u3001<strong>\u5ba2\u89b3\u7684\u306b\u691c\u8a3c\u53ef\u80fd\u306a\u30bf\u30b9\u30af<\/strong>\u306b\u304a\u3051\u308b\u5f37\u5316\u5b66\u7fd2\u30a2\u30d7\u30ed\u30fc\u30c1\u3067\u3059\u3002\u4eba\u9593\u3084\u5225AI\u306e\u4e3b\u89b3\u7684\u8a55\u4fa1\u3092\u5fc5\u8981\u3068\u305b\u305a\u3001\u81ea\u52d5\u7684\u306b\u5831\u916c\u3092\u8a08\u7b97\u3067\u304d\u307e\u3059\u3002DeepSeek-R1\u3084QwQ-32B\u306a\u3069\u306e\u63a8\u8ad6\u7279\u5316\u578b\u30e2\u30c7\u30eb\u3067\u63a1\u7528\u3055\u308c\u3066\u304a\u308a\u3001Chain-of-Thought\uff08CoT\uff09\u80fd\u529b\u306e\u98db\u8e8d\u7684\u306a\u5411\u4e0a\u304c\u5831\u544a\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n  <!-- ============ Section 07 ============ -->\n  <div class=\"ap-section-head\" id=\"ap-s07\">\n    <span class=\"ap-section-num\">07<\/span>\n    <div class=\"ap-section-line\"><\/div>\n  <\/div>\n  <h2>\u30a2\u30d7\u30ea\u30ba\u30e0\u306e\u53d6\u308a\u7d44\u307f\u3068\u4eca\u5f8c\u306e\u5c55\u671b<\/h2>\n\n  <p>\u682a\u5f0f\u4f1a\u793e\u30a2\u30d7\u30ea\u30ba\u30e0\u3067\u306f\u3001LLM\u3092\u6d3b\u7528\u3057\u305f\u30d7\u30ed\u30c0\u30af\u30c8\u958b\u767a\u306b\u304a\u3044\u3066\u3001\u30c9\u30e1\u30a4\u30f3\u7279\u5316\u306e\u5f37\u5316\u5b66\u7fd2\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u69cb\u7bc9\u3092\u7a4d\u6975\u7684\u306b\u63a2\u6c42\u3057\u3066\u3044\u307e\u3059\u3002\u7279\u306b<strong>DPO\u306b\u3088\u308b\u4f4e\u30b3\u30b9\u30c8\u30fb\u9ad8\u54c1\u8cea\u306a\u7279\u5316\u30e2\u30c7\u30eb\u306e\u69cb\u7bc9<\/strong>\u3068\u3001<strong>RLAIF \u3092\u6d3b\u7528\u3057\u305f\u30b9\u30b1\u30fc\u30e9\u30d6\u30eb\u306a\u30c7\u30fc\u30bf\u751f\u6210<\/strong>\u306b\u6ce8\u529b\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n  <p>\u5f37\u5316\u5b66\u7fd2\u306f\u300c\u5b8c\u6210\u3057\u305f\u6280\u8853\u300d\u3067\u306f\u306a\u304f\u3001\u73fe\u5728\u9032\u884c\u5f62\u3067\u6025\u901f\u306b\u767a\u5c55\u3057\u3066\u3044\u307e\u3059\u3002GRPO\u3084RLVR\u306e\u3088\u3046\u306a\u65b0\u624b\u6cd5\u304c\u6b21\u3005\u3068\u767b\u5834\u3057\u3001\u3088\u308a\u5c11\u306a\u3044\u8a08\u7b97\u30b3\u30b9\u30c8\u3067\u3001\u3088\u308a\u9ad8\u3044\u54c1\u8cea\u306e\u30e2\u30c7\u30eb\u3092\u4f5c\u308c\u308b\u6642\u4ee3\u304c\u6765\u3066\u3044\u307e\u3059\u3002\u81ea\u793e\u30d7\u30ed\u30c0\u30af\u30c8\u306bLLM\u3092\u7d44\u307f\u8fbc\u3080\u969b\u306f\u3001\u3053\u3046\u3057\u305f\u6700\u65b0\u52d5\u5411\u3092\u5e38\u306b\u30a6\u30a9\u30c3\u30c1\u3057\u306a\u304c\u3089\u3001\u6700\u9069\u306a\u624b\u6cd5\u3092\u9078\u629e\u3059\u308b\u3053\u3068\u304c\u91cd\u8981\u3067\u3059\u3002<\/p>\n\n<\/div>\n\n","protected":false},"excerpt":{"rendered":"<p>AI \/ LLM Machine Learning \u8ad6\u6587\u89e3\u8aac: arXiv 2305.18290 2025.02.17 \u682a\u5f0f\u4f1a\u793e\u30a2\u30d7\u30ea\u30ba\u30e0\u306e\u30c6\u30c3\u30af\u8a18\u4e8b\u306b\u306a\u308a\u307e\u3059\u3002 \u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\uff08LLM\uff09\u306e\u7cbe\u5ea6\u3068\u5b89\u5168\u6027\u3092\u5411\u4e0a\u3055\u305b\u308b\u9375\u3068 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":403,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":"","wp-seo-meta-description":"","wp-seo-meta-robots":[]},"categories":[1],"tags":[],"class_list":{"0":"post-488","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","7":"category-news","8":"c-entry"},"_links":{"self":[{"href":"https:\/\/apprhythm.net\/index.php?rest_route=\/wp\/v2\/posts\/488","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/apprhythm.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/apprhythm.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/apprhythm.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/apprhythm.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=488"}],"version-history":[{"count":3,"href":"https:\/\/apprhythm.net\/index.php?rest_route=\/wp\/v2\/posts\/488\/revisions"}],"predecessor-version":[{"id":491,"href":"https:\/\/apprhythm.net\/index.php?rest_route=\/wp\/v2\/posts\/488\/revisions\/491"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/apprhythm.net\/index.php?rest_route=\/wp\/v2\/media\/403"}],"wp:attachment":[{"href":"https:\/\/apprhythm.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=488"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/apprhythm.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=488"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/apprhythm.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=488"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}