<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>nevertheless</title>
    <link>https://orchidbyw1.tistory.com/</link>
    <description></description>
    <language>ko</language>
    <pubDate>Mon, 6 Apr 2026 19:54:29 +0900</pubDate>
    <generator>TISTORY</generator>
    <ttl>100</ttl>
    <managingEditor>우연입니다</managingEditor>
    <image>
      <title>nevertheless</title>
      <url>https://tistory1.daumcdn.net/tistory/5498263/attach/434cbfef6cb64a5a8cd3d43248c1c5d0</url>
      <link>https://orchidbyw1.tistory.com</link>
    </image>
    <item>
      <title>Block 연산량 비교: VGGNet, ResNet, MobileNetV2</title>
      <link>https://orchidbyw1.tistory.com/10</link>
      <description>&lt;h2 data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;일반 Convolution 연산량&lt;/span&gt;&lt;/h2&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;입력 texture map 크기 = $H \times W$&lt;/li&gt;
&lt;li&gt;입력 채널 수 = $C_{in}$&lt;/li&gt;
&lt;li&gt;출력 채널 수 = $C_{out}$&lt;/li&gt;
&lt;li&gt;커널 크기 = $K \times K$&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\therefore$ 총 연산량 = $H \times W \times C_{in} \times C_{out} \times K \times K$&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;VGGNet&lt;/span&gt;&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;모든 레이어가 표준 Conv.&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;커널 크기 = $3 \times 3$&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;총 연산량 = $H \times W \times C_{in} \times C_{out} \times 3 \times 3$&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;ResNet&lt;/span&gt;&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Bottleneck&lt;/b&gt; -&amp;gt; 채널을 줄였다가 다시 늘리는 구조.&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;1x1 Conv&lt;/b&gt; : $H \times W \times C_{in} \times C_{mid} \times 1 \times 1$ (채널 수 줄임)&lt;/li&gt;
&lt;li&gt;&lt;b&gt;3x3 Conv&lt;/b&gt; : $H \times W \times C_{mid} \times C_{mid} \times 3 \times 3$&lt;/li&gt;
&lt;li&gt;&lt;b&gt;1x1 Conv&lt;/b&gt; : $H \times W \times C_{mid} \times C_{out} \times 1 \times 1$ (채널 수 다시 늘림)&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Shortcut 1x1 Conv&lt;/b&gt; = $H \times W \times C_{in} \times C_{out} \times 1 \times 1$&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;MobileNetV2&lt;/span&gt;&lt;/h2&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Depthwise Separable Convolution&lt;/li&gt;
&lt;li&gt;Inverted (Narrow -&amp;gt; Wide -&amp;gt; Narrow)&lt;/li&gt;
&lt;li&gt;expansion ratio t -&amp;gt; 확장 후 채널 수 : $t \cdot C_{in}$&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;(1) &lt;b&gt;expansion 1x1 conv&lt;/b&gt; : $H \times W \times C_{in} \times t \cdot C_{in} \times 1 \times 1$&lt;br /&gt;(2) &lt;b&gt;depthwise 3x3 conv&lt;/b&gt; : $H \times W \times t \cdot C_{in} \times 3 \times 3$&lt;br /&gt;-&amp;gt; 채널마다 따로 3x3 적용 -&amp;gt; 채널 곱 없음&lt;br /&gt;(3) &lt;b&gt;projection 1x1 conv&lt;/b&gt; : $H \times W \times t \cdot C_{in} \times C_{out} \times 1 \times 1$&lt;/p&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;(예)&lt;/h2&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;총 연산량 = $H \times W \times C_{in} \times C_{out} \times K \times K$&lt;/li&gt;
&lt;li&gt;입력 feature map : $32 \times 32$&lt;/li&gt;
&lt;li&gt;입력 채널 : 32, 출력 채널 : 64, 커널 : $3 \times 3$, expansion ratio : 6&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;1️⃣ &lt;span style=&quot;background-color: #dddddd;&quot;&gt;VGGNet&lt;/span&gt;&lt;/h3&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;구조&lt;/b&gt;&lt;/h4&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;3&amp;times;3 conv&lt;/li&gt;
&lt;li&gt;3&amp;times;3 conv&lt;/li&gt;
&lt;/ul&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;연산량&lt;/b&gt;&lt;/h4&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;$32 \times 32 \times 32 \times 64 \times 3 \times 3 = 18,874,368$&lt;/li&gt;
&lt;/ul&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;파라미터 수&lt;/b&gt;&lt;/h4&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;= $32 \times 64 \times 3 \times 3 = 18,432$&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;2️⃣ &lt;span style=&quot;background-color: #dddddd;&quot;&gt;ResNet Basic block&lt;/span&gt;&lt;/h3&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;구조&lt;/b&gt;&lt;/h4&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;3&amp;times;3 conv&lt;/li&gt;
&lt;li&gt;3&amp;times;3 conv&lt;/li&gt;
&lt;li&gt;identity shortcut (연산 없음)&lt;/li&gt;
&lt;/ul&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;연산량&lt;/b&gt;&lt;/h4&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;첫 번째 conv = $32 \times 32 \times 32 \times 64 \times 3 \times 3 = 18,874,368$&lt;/li&gt;
&lt;li&gt;두 번째 conv = $32 \times 32 \times 64 \times 64 \times 3 \times 3 = 37,748,736$&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\therefore$ 연산량 = $56,623,104$&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;파라미터 수&lt;/b&gt;&lt;/h4&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;첫 번째 conv = $32 \times 64 \times 3 \times 3 = 18,432$&lt;/li&gt;
&lt;li&gt;두 번째 conv = $64 \times 64 \times 3 \times 3 = 36,864$&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\therefore$ 파라미터 수 = $55,296$&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;3️⃣ &lt;span style=&quot;background-color: #dddddd;&quot;&gt;ResNet Bottleneck block&lt;/span&gt;&lt;/h3&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;구조&lt;/b&gt;&lt;/h4&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;1x1 conv : 채널 축소&lt;/li&gt;
&lt;li&gt;3&amp;times;3 conv&lt;/li&gt;
&lt;li&gt;1x1 conv : 채널 복원&lt;/li&gt;
&lt;li&gt;1x1 projection shortcut&lt;/li&gt;
&lt;li&gt;bottleneck 중간 채널 = 16 (보통 출력 채널의 1/4 정도로 잡음)&lt;/li&gt;
&lt;/ul&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;연산량&lt;/b&gt;&lt;/h4&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;첫 번째 conv = $32 \times 32 \times 32 \times 16 \times 1 \times 1 = 524,288$&lt;/li&gt;
&lt;li&gt;두 번째 conv = $32 \times 32 \times 16 \times 16 \times 3 \times 3 = 2,359,296$&lt;/li&gt;
&lt;li&gt;세 번째 conv = $32 \times 32 \times 16 \times 64 \times 1 \times 1 = 1,048,576$&lt;/li&gt;
&lt;li&gt;Shortcut $1 \times 1$ conv = $32 \times 32 \times 32 \times 64 \times 1 \times 1 = 2,097,152$&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\therefore$ 연산량 = $6,029,312$&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;파라미터 수&lt;/b&gt;&lt;/h4&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;첫 번째 conv = $32 \times 16 \times 1 \times 1 = 512$&lt;/li&gt;
&lt;li&gt;두 번째 conv = $16 \times 16 \times 3 \times 3 = 2,304$&lt;/li&gt;
&lt;li&gt;세 번째 conv = $16 \times 64 \times 1 \times 1 = 1,024$&lt;/li&gt;
&lt;li&gt;Shortcut $1 \times 1$ conv = $32 \times 64 \times 1 \times 1 = 2,048$&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\therefore$ 파라미터 수 = $5,888$&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;4️⃣ &lt;span style=&quot;background-color: #dddddd;&quot;&gt;MobileNetV2 Block&lt;/span&gt;&lt;/h3&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;구조&lt;/b&gt;&lt;/h4&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;1x1 expansion (32 -&amp;gt; 192)&lt;/li&gt;
&lt;li&gt;3&amp;times;3 depthwise (192 채널)&lt;/li&gt;
&lt;li&gt;1x1 projection (192 -&amp;gt; 64)&lt;/li&gt;
&lt;/ul&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;연산량&lt;/b&gt;&lt;/h4&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;expansion 1x1 conv = $32 \times 32 \times 32 \times 192 = 6,291,456$&lt;/li&gt;
&lt;li&gt;depthwise 3x3 conv = $32 \times 32 \times 192 \times 3 \times = 1,769,472$&lt;/li&gt;
&lt;li&gt;projection 1x2 conv = $32 \times 32 \times 192 \times 64 = 12,582,912$&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\therefore$ 연산량 = $20,643,840$&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;파라미터 수&lt;/b&gt;&lt;/h4&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;expansion 1x1 conv = $32 \times 192 = 6,144$&lt;/li&gt;
&lt;li&gt;depthwise 3x3 conv = $192 \times 3 \times = 1,728$&lt;/li&gt;
&lt;li&gt;projection 1x2 conv = $192 \times 64 = 12,288$&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\therefore$ 파라미터 수 = $20,160$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;** 최종 비교 표&lt;/span&gt;&lt;/h3&gt;
&lt;table style=&quot;border-collapse: collapse; width: 80.4651%;&quot; border=&quot;1&quot; data-ke-align=&quot;alignLeft&quot; data-ke-style=&quot;style12&quot;&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td style=&quot;width: 24.6512%;&quot;&gt;&lt;span&gt;Block&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 13.2558%;&quot;&gt;&lt;span&gt;Params&lt;br /&gt;&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 15.2326%;&quot;&gt;&lt;span&gt;MACs&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 27.3255%;&quot;&gt;&lt;span&gt;비고&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&quot;width: 24.6512%;&quot;&gt;&lt;span&gt;VGG (3&amp;times;3 conv 1개)&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 13.2558%;&quot;&gt;&lt;span&gt;18,432&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 15.2326%;&quot;&gt;&lt;span&gt;18,874,368&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 27.3255%;&quot;&gt;&lt;span&gt;일반 conv 1개&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&quot;width: 24.6512%;&quot;&gt;&lt;span&gt;ResNet Basic Block&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 13.2558%;&quot;&gt;&lt;span&gt;55,296&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 15.2326%;&quot;&gt;&lt;span&gt;56,623,104&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 27.3255%;&quot;&gt;&lt;span&gt;3&amp;times;3 conv 2개&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&quot;width: 24.6512%;&quot;&gt;&lt;span&gt;ResNet Bottleneck Block&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 13.2558%;&quot;&gt;&lt;span&gt;5,888&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 15.2326%;&quot;&gt;&lt;span&gt;6,029,312&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 27.3255%;&quot;&gt;&lt;span&gt;1&amp;times;1&amp;rarr;3&amp;times;3&amp;rarr;1&amp;times;1 + projection&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td style=&quot;width: 24.6512%;&quot;&gt;&lt;span&gt;MobileNetV2 Block&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 13.2558%;&quot;&gt;&lt;span&gt;20,160&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 15.2326%;&quot;&gt;&lt;span&gt;20,643,840&lt;/span&gt;&lt;/td&gt;
&lt;td style=&quot;width: 27.3255%;&quot;&gt;&lt;span&gt;expand&amp;rarr;depthwise&amp;rarr;project&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;*** 이는 block당 연산이다. 전체 네트워크 연산량은 다음 포스트에서 계산하겠다.&lt;/p&gt;</description>
      <category>AI/논문 리뷰</category>
      <author>우연입니다</author>
      <guid isPermaLink="true">https://orchidbyw1.tistory.com/10</guid>
      <comments>https://orchidbyw1.tistory.com/10#entry10comment</comments>
      <pubDate>Sat, 4 Apr 2026 20:10:44 +0900</pubDate>
    </item>
    <item>
      <title>[논문 리뷰] MobileNetV2 Implementation</title>
      <link>https://orchidbyw1.tistory.com/9</link>
      <description>&lt;blockquote data-ke-style=&quot;style3&quot;&gt;MobileNetV2 논문을 읽은 뒤 CIFAR-10 분류 실험을 위해 모델을 직접 구현하고 학습해보았다.&lt;br /&gt;논문 정리: &lt;a href=&quot;https://orchidbyw1.tistory.com/6&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;https://orchidbyw1.tistory.com/6&lt;/a&gt;&lt;/blockquote&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;모델 설정&lt;/span&gt;&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;코드 베이스: kuangliu/pytorch-cifar 참고&lt;/li&gt;
&lt;li&gt;데이터셋: CIFAR-10&lt;/li&gt;
&lt;li&gt;모델: MobileNetV2
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Inverted Residual Block 사용&lt;/li&gt;
&lt;li&gt;Depthwise Separable Convolution 적용&lt;/li&gt;
&lt;li&gt;width multiplier=1.2&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;MobileNetV2는 기존 CNN과 달리, &lt;b&gt;inverted residual 구조를 사용하여 채널을 확장한 뒤 다시 축소하는 방식&lt;/b&gt;&lt;span&gt;을 사용한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;초기 convolution 역시 CIFAR-10 입력 크기(32&amp;times;32)에 맞게 ImageNet용 구조처럼 stride=2가 아닌, &lt;b&gt;3&amp;times;3 conv, stride=1&lt;/b&gt;&lt;span&gt;로 설정하였다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;추가로 width multiplier를 사용해 각 state의 채널 수를 일정 비율로 조절할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;학습 설정&lt;/span&gt;&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Epoch: 200&lt;/li&gt;
&lt;li&gt;Batch size: 128&lt;/li&gt;
&lt;li&gt;Optimizer: SGD&lt;/li&gt;
&lt;li&gt;Learning rate: 0.1&lt;/li&gt;
&lt;li&gt;Momentum: 0.9&lt;/li&gt;
&lt;li&gt;Weight decay: 5e-4&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;학습은 CIFAR-10 분류에서 일반적으로 사용되는 설정을 따랐으며,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;특히 SGD + momentum, 그리고 weight decay를 사용해 안정적으로 학습되도록 했다.&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;RandomCrop(32, padding=4)&lt;/li&gt;
&lt;li&gt;RandomHorizontalFlip&lt;/li&gt;
&lt;li&gt;Normalize: (0.4914, 0.4822, 0.4465) / (0.2023, 0.1994, 0.2010)&lt;/li&gt;
&lt;/ul&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-style=&quot;style6&quot; data-ke-type=&quot;horizontalRule&quot; /&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;모델 구현&lt;/blockquote&gt;
&lt;pre id=&quot;code_1774776757771&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;import torch
import torch.nn as nn
import torch.nn.functional as F

def make_divisible(x, divisible_by=8): 
  # 값이 8로 나누어 떨어지게끔 수정
  import numpy as np
  return int(np.ceil(x * 1. / divisible_by) * divisible_by)

class Block(nn.Module): # nn.Module 상속받음

  def __init__(self, in_planes, out_planes, expansion, stride):
    super(Block, self).__init__() # nn.Module 초기화부터

    # stride 저장
    self.stride = stride

    # inverted residual shortcut
    # shortcut은 stride가 1이고, 입력 차원 수와 출력 차원 수가 같은 경우에만
    if stride == 1 and in_planes == out_planes:
      self.use_residual = True
    else:
      self.use_residual = False

    # 확장된 중간 채널 수
    planes = expansion * in_planes

    # 1x1 conv (expand) : Narrow -&amp;gt; Wide
    self.conv1 = nn.Conv2d(
        in_planes, # 입력 채널 수
        planes, # 출력 채널 수 (= 확장 채널 수)
        kernel_size=1, # 1x1 conv
        stride=1,
        padding=0,
        bias=False
    )
    self.bn1 = nn.BatchNorm2d(planes)

    # 3&amp;times;3 depthwise conv : Wide 유지
    self.conv2 = nn.Conv2d(
        planes, # 입력 채널 수
        planes, # 출력 채널 수 (= 입력 채널 수)
        kernel_size=3, # 3x3 depthwise conv
        stride=stride,
        padding=1,
        groups=planes, # depthwise convolution. 각 채널마다 독립적으로 3x3 conv 수행
        bias=False
    )
    self.bn2 = nn.BatchNorm2d(planes)

    # 1&amp;times;1 conv (projection) : Wide -&amp;gt; Narrow
    self.conv3 = nn.Conv2d(
        planes, # 입력 채널 수
        out_planes, # 출력 채널 수
        kernel_size=1, # 1x1 conv
        stride=1,
        padding=0,
        bias=False
    )
    self.bn3 = nn.BatchNorm2d(out_planes)

  def forward(self, x): # 블록 출력
    out = self.conv1(x)
    out = self.bn1(out)
    out = F.relu6(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = F.relu6(out)

    out = self.conv3(out)
    out = self.bn3(out)

    # stride=1이고, 입력/출력 채널이 같을 때만 residual 연결
    if self.use_residual:
      out = out + x

    return out

class MobileNetV2(nn.Module):
  # MobileNetV2 전체 네트워크 구조
  # (expansion, out_planes, num_blocks, stride)
  cfg = [
      (1,  16, 1, 1),
      (6,  24, 2, 1), # NOTE: change stride 2 -&amp;gt; 1 for CIFAR10
      (6,  32, 3, 2),
      (6,  64, 4, 2),
      (6,  96, 3, 1),
      (6, 160, 3, 2),
      (6, 320, 1, 1)
  ]

  def __init__(self, num_classes=10, width_mult=1.):
    super(MobileNetV2, self).__init__()

    self.width_mult = width_mult

    # 마지막 출력 채널
    last_channel = 1280
    self.last_channel = make_divisible(last_channel * width_mult) if width_mult &amp;gt; 1.0 else last_channel # width_multiplier 적용

    # 첫 번째 conv
    self.conv1 = nn.Conv2d(
        3, # RGB 입력
        32, # 첫 출력 채널 수
        kernel_size=3,
        stride=1, # NOTE: change conv1 stride 2 -&amp;gt; 1 for CIFAR10
        padding=1,
        bias=False
    )
    self.bn1 = nn.BatchNorm2d(32)

    # bottleneck block을 순서대로 쌓음
    self.layers = self._make_layers(in_planes=32)
    final_planes = make_divisible(320 * self.width_mult)

    # 마지막 1x1 conv
    self.conv2 = nn.Conv2d(
        final_planes,
        self.last_channel,
        kernel_size=1,
        stride=1,
        padding=0,
        bias=False
    )
    self.bn2 = nn.BatchNorm2d(self.last_channel)

    # 최종 분류기
    self.linear = nn.Linear(self.last_channel, num_classes)

  def _make_layers(self, in_planes):
    # cfg를 읽어서 bottleneck block을 순서대로 생성
    layers = []

    # stage별 설정을 하나씩 읽음
    for expansion, out_planes, num_blocks, stride in self.cfg:
        # 첫 번째 block만 주어진 stride 사용
        # 나머지 block은 stride=1
        # 예: num_blocks=3, stride=2이면 [2,1,1] 형태로 block 생성
        strides = [stride] + [1] * (num_blocks - 1)

        # 모든 out_planes에 width_multiplier 적용
        output_planes = make_divisible(out_planes * self.width_mult)

        for stride in strides:
            # block 하나 추가
            layers.append(Block(in_planes, output_planes, expansion, stride))

            # 다음 block의 입력 채널은 현재 출력 채널이 됨
            in_planes = output_planes

    # 만든 block 리스트 보냄
    return nn.Sequential(*layers)

  def forward(self, x):
    # 첫 conv + BN + ReLU
    out = F.relu6(self.bn1(self.conv1(x)))

    # bottleneck block들 통과
    out = self.layers(out)

    # 마지막 1x1 conv + BN + ReLU
    out = F.relu6(self.bn2(self.conv2(out)))

    out = F.avg_pool2d(out, 4) # # NOTE: change pooling kernel_size 7 -&amp;gt; 4 for CIFAR10
    out = out.view(out.size(0), -1) # flatten
    out = self.linear(out) # fully connected

    return out&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;학습 및 테스트&lt;/blockquote&gt;
&lt;pre id=&quot;code_1774776757774&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn

# ----- 기본 세팅 -----
device = 'cuda' if torch.cuda.is_available() else 'cpu' # 디바이스 세팅. gpu 가능하면 그걸로

best_acc = 0  # best test accuracy
num_epochs = 200 # epoch 수

# ----- 데이터 전처리 -----
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4), # 랜덤하게 크롭
    transforms.RandomHorizontalFlip(), # 좌우 반전 랜덤 적용
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]) # train -&amp;gt; 더 다양하게 학습
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]) # test -&amp;gt; 성능 측정을 위해 tersor 변환과 정규화만

# ----- 데이터셋 -----
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=128, # 한 번에 128장씩 학습
    shuffle=True, # train을 위해 섞음
    num_workers=2
    )

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=100, # 한 번에 100장씩 섞음
    shuffle=False, # test이므로 섞을 필요 없음
    num_workers=2
    )

# ----- 모델 준비 -----
net = MobileNetV2(width_mult=1.2)
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net) # 병렬로 사용
    cudnn.benchmark = True

# ----- loss / optimizer / scheduler -----
criterion = nn.CrossEntropyLoss() # 분류 문제에서 사용하는 loss 함수
optimizer = optim.SGD( # SGD optimizer 설정
    net.parameters(),
    lr=0.1,
    momentum=0.9,
    weight_decay=5e-4
    )
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs) # 학습률 조정 -&amp;gt; 학습 초반에는 크게 배우고 후반엔 세밀하게 조정

# ----- 학습 -----
def train():
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader): # 학습 데이터를 batch 단위로 하나씩 가져옴
        inputs, targets = inputs.to(device), targets.to(device) # 입력과 정답을 이동
        optimizer.zero_grad() # 이전 gradient 초기화
        outputs = net(inputs) # forward propagation
        loss = criterion(outputs, targets) # loss 계산
        loss.backward() # gradient 계산
        optimizer.step() # 가중치 업데이트

        train_loss += loss.item() # loss 누적
        _, predicted = outputs.max(1) # 예측값 계산
        total += targets.size(0) # 현재까지 accuracy 계산
        correct += predicted.eq(targets).sum().item()

    avg_train_loss = train_loss / len(trainloader)
    train_acc = 100. * correct / total

    return avg_train_loss, train_acc

# ----- 테스트 -----
def test():
    global best_acc # 현재까지의 최고 정확도 기록 위해
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad(): # 테스트할 땐 gradient가 필요 없으니까 계산을 끔
        for batch_idx, (inputs, targets) in enumerate(testloader): # 테스트 데이터를 batch 단위로 하나씩 가져옴
            inputs, targets = inputs.to(device), targets.to(device) # 입력과 정답을 이동
            outputs = net(inputs) # 학습 ㄴ 예측만
            loss = criterion(outputs, targets) # loss 구함

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0) # 현재까지 test accuracy 계산
            correct += predicted.eq(targets).sum().item()

    avg_test_loss = test_loss / len(testloader)
    test_acc = 100. * correct / total

    # 현재 test accuracy가 최고 성능보다 높으면 best_acc 갱신
    if test_acc &amp;gt; best_acc:
        best_acc = test_acc

    return avg_test_loss, test_acc

# ----- 메인 루프 -----
print(&quot;----- Start Training -----&quot;)
for epoch in range(num_epochs):
    train_loss, train_acc = train()
    test_loss, test_acc = test()
    scheduler.step()

    current_lr = optimizer.param_groups[0]['lr']

    print(f&quot;Epoch [{epoch+1}/{num_epochs}] | &quot;
          f&quot;LR: {current_lr:.5f} | &quot;
          f&quot;Train Loss: {train_loss:.4f} | &quot;
          f&quot;Train Acc: {train_acc:.2f}% | &quot;
          f&quot;Test Loss: {test_loss:.4f} | &quot;
          f&quot;Test Acc: {test_acc:.2f}% | &quot;
          f&quot;Best Acc: {best_acc:.2f}%&quot;)

# ----- 최종 출력 -----
print(&quot;----- Finished Training -----&quot;)
print(&quot;Best Accuracy:&quot;, best_acc)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote style=&quot;color: #666666; text-align: left;&quot; data-ke-style=&quot;style2&quot;&gt;결과&lt;br /&gt;Best Accuracy: 92.87&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이번 실험에서는 &lt;b&gt;&lt;span&gt;width_mult = 1.2&lt;/span&gt;&lt;/b&gt;를 적용한 MobileNetV2를 사용했고,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;기본 구조보다 채널 수를 확장한 결과 최종 정확도 92.87%를 얻었다.&lt;/p&gt;
&lt;pre id=&quot;code_1774776757777&quot; class=&quot;python&quot; style=&quot;background-color: #f8f8f8; color: #383a42;&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;100%|██████████| 170M/170M [00:13&amp;lt;00:00, 12.6MB/s]
----- Start Training -----
Epoch [1/200] | LR: 0.09999 | Train Loss: 2.1046 | Train Acc: 27.17% | Test Loss: 1.6292 | Test Acc: 39.22% | Best Acc: 39.22%
Epoch [2/200] | LR: 0.09998 | Train Loss: 1.4312 | Train Acc: 47.30% | Test Loss: 1.3658 | Test Acc: 50.71% | Best Acc: 50.71%
Epoch [3/200] | LR: 0.09994 | Train Loss: 1.1455 | Train Acc: 58.88% | Test Loss: 1.9070 | Test Acc: 45.32% | Best Acc: 50.71%
Epoch [4/200] | LR: 0.09990 | Train Loss: 0.9709 | Train Acc: 65.23% | Test Loss: 0.8767 | Test Acc: 69.35% | Best Acc: 69.35%
Epoch [5/200] | LR: 0.09985 | Train Loss: 0.8380 | Train Acc: 70.58% | Test Loss: 0.9978 | Test Acc: 66.31% | Best Acc: 69.35%
Epoch [6/200] | LR: 0.09978 | Train Loss: 0.7596 | Train Acc: 73.47% | Test Loss: 0.9901 | Test Acc: 66.84% | Best Acc: 69.35%
Epoch [7/200] | LR: 0.09970 | Train Loss: 0.7007 | Train Acc: 75.68% | Test Loss: 0.7673 | Test Acc: 73.69% | Best Acc: 73.69%
Epoch [8/200] | LR: 0.09961 | Train Loss: 0.6703 | Train Acc: 76.85% | Test Loss: 0.7101 | Test Acc: 76.27% | Best Acc: 76.27%
Epoch [9/200] | LR: 0.09950 | Train Loss: 0.6504 | Train Acc: 77.55% | Test Loss: 0.7601 | Test Acc: 74.43% | Best Acc: 76.27%
Epoch [10/200] | LR: 0.09938 | Train Loss: 0.6311 | Train Acc: 78.32% | Test Loss: 0.8895 | Test Acc: 69.54% | Best Acc: 76.27%
Epoch [11/200] | LR: 0.09926 | Train Loss: 0.6287 | Train Acc: 78.42% | Test Loss: 1.1790 | Test Acc: 65.21% | Best Acc: 76.27%
Epoch [12/200] | LR: 0.09911 | Train Loss: 0.6170 | Train Acc: 78.80% | Test Loss: 0.9179 | Test Acc: 69.47% | Best Acc: 76.27%
Epoch [13/200] | LR: 0.09896 | Train Loss: 0.6147 | Train Acc: 79.06% | Test Loss: 0.7708 | Test Acc: 74.44% | Best Acc: 76.27%
Epoch [14/200] | LR: 0.09880 | Train Loss: 0.6095 | Train Acc: 79.13% | Test Loss: 0.8485 | Test Acc: 72.15% | Best Acc: 76.27%
Epoch [15/200] | LR: 0.09862 | Train Loss: 0.5999 | Train Acc: 79.39% | Test Loss: 0.6911 | Test Acc: 76.97% | Best Acc: 76.97%
Epoch [16/200] | LR: 0.09843 | Train Loss: 0.5934 | Train Acc: 79.39% | Test Loss: 0.7168 | Test Acc: 76.12% | Best Acc: 76.97%
Epoch [17/200] | LR: 0.09823 | Train Loss: 0.5962 | Train Acc: 79.49% | Test Loss: 0.7701 | Test Acc: 74.62% | Best Acc: 76.97%
Epoch [18/200] | LR: 0.09801 | Train Loss: 0.5833 | Train Acc: 80.20% | Test Loss: 0.6593 | Test Acc: 77.30% | Best Acc: 77.30%
Epoch [19/200] | LR: 0.09779 | Train Loss: 0.5817 | Train Acc: 80.11% | Test Loss: 0.7992 | Test Acc: 72.61% | Best Acc: 77.30%
Epoch [20/200] | LR: 0.09755 | Train Loss: 0.5840 | Train Acc: 79.99% | Test Loss: 0.8392 | Test Acc: 71.88% | Best Acc: 77.30%
Epoch [21/200] | LR: 0.09730 | Train Loss: 0.5830 | Train Acc: 80.01% | Test Loss: 0.8496 | Test Acc: 72.77% | Best Acc: 77.30%
Epoch [22/200] | LR: 0.09704 | Train Loss: 0.5865 | Train Acc: 79.90% | Test Loss: 0.6389 | Test Acc: 78.96% | Best Acc: 78.96%
Epoch [23/200] | LR: 0.09677 | Train Loss: 0.5771 | Train Acc: 80.09% | Test Loss: 0.6735 | Test Acc: 76.81% | Best Acc: 78.96%
Epoch [24/200] | LR: 0.09649 | Train Loss: 0.5645 | Train Acc: 80.62% | Test Loss: 0.7584 | Test Acc: 73.39% | Best Acc: 78.96%
Epoch [25/200] | LR: 0.09619 | Train Loss: 0.5759 | Train Acc: 80.14% | Test Loss: 0.7221 | Test Acc: 75.47% | Best Acc: 78.96%
Epoch [26/200] | LR: 0.09589 | Train Loss: 0.5662 | Train Acc: 80.56% | Test Loss: 0.7672 | Test Acc: 74.68% | Best Acc: 78.96%
Epoch [27/200] | LR: 0.09557 | Train Loss: 0.5709 | Train Acc: 80.29% | Test Loss: 0.7213 | Test Acc: 76.39% | Best Acc: 78.96%
Epoch [28/200] | LR: 0.09524 | Train Loss: 0.5673 | Train Acc: 80.64% | Test Loss: 0.7523 | Test Acc: 74.51% | Best Acc: 78.96%
Epoch [29/200] | LR: 0.09490 | Train Loss: 0.5594 | Train Acc: 80.77% | Test Loss: 0.6931 | Test Acc: 76.36% | Best Acc: 78.96%
Epoch [30/200] | LR: 0.09455 | Train Loss: 0.5646 | Train Acc: 80.66% | Test Loss: 0.6109 | Test Acc: 78.69% | Best Acc: 78.96%
Epoch [31/200] | LR: 0.09419 | Train Loss: 0.5557 | Train Acc: 80.90% | Test Loss: 0.7545 | Test Acc: 74.40% | Best Acc: 78.96%
Epoch [32/200] | LR: 0.09382 | Train Loss: 0.5583 | Train Acc: 80.88% | Test Loss: 0.7263 | Test Acc: 75.09% | Best Acc: 78.96%
Epoch [33/200] | LR: 0.09343 | Train Loss: 0.5566 | Train Acc: 80.87% | Test Loss: 0.7381 | Test Acc: 74.57% | Best Acc: 78.96%
Epoch [34/200] | LR: 0.09304 | Train Loss: 0.5561 | Train Acc: 80.87% | Test Loss: 0.7095 | Test Acc: 75.80% | Best Acc: 78.96%
Epoch [35/200] | LR: 0.09263 | Train Loss: 0.5550 | Train Acc: 80.89% | Test Loss: 0.8686 | Test Acc: 71.42% | Best Acc: 78.96%
Epoch [36/200] | LR: 0.09222 | Train Loss: 0.5554 | Train Acc: 80.99% | Test Loss: 0.6206 | Test Acc: 78.71% | Best Acc: 78.96%
Epoch [37/200] | LR: 0.09179 | Train Loss: 0.5489 | Train Acc: 81.06% | Test Loss: 0.6612 | Test Acc: 77.44% | Best Acc: 78.96%
Epoch [38/200] | LR: 0.09135 | Train Loss: 0.5544 | Train Acc: 81.02% | Test Loss: 0.8496 | Test Acc: 73.57% | Best Acc: 78.96%
Epoch [39/200] | LR: 0.09091 | Train Loss: 0.5486 | Train Acc: 81.08% | Test Loss: 0.6201 | Test Acc: 78.97% | Best Acc: 78.97%
Epoch [40/200] | LR: 0.09045 | Train Loss: 0.5413 | Train Acc: 81.41% | Test Loss: 1.0010 | Test Acc: 68.07% | Best Acc: 78.97%
Epoch [41/200] | LR: 0.08998 | Train Loss: 0.5522 | Train Acc: 81.21% | Test Loss: 0.6933 | Test Acc: 76.10% | Best Acc: 78.97%
Epoch [42/200] | LR: 0.08951 | Train Loss: 0.5469 | Train Acc: 81.08% | Test Loss: 0.6160 | Test Acc: 79.28% | Best Acc: 79.28%
Epoch [43/200] | LR: 0.08902 | Train Loss: 0.5460 | Train Acc: 81.26% | Test Loss: 0.8246 | Test Acc: 71.64% | Best Acc: 79.28%
Epoch [44/200] | LR: 0.08853 | Train Loss: 0.5485 | Train Acc: 81.02% | Test Loss: 0.9418 | Test Acc: 69.42% | Best Acc: 79.28%
Epoch [45/200] | LR: 0.08802 | Train Loss: 0.5509 | Train Acc: 81.24% | Test Loss: 0.6220 | Test Acc: 78.66% | Best Acc: 79.28%
Epoch [46/200] | LR: 0.08751 | Train Loss: 0.5492 | Train Acc: 81.26% | Test Loss: 0.5912 | Test Acc: 80.06% | Best Acc: 80.06%
Epoch [47/200] | LR: 0.08698 | Train Loss: 0.5425 | Train Acc: 81.44% | Test Loss: 0.7106 | Test Acc: 76.02% | Best Acc: 80.06%
Epoch [48/200] | LR: 0.08645 | Train Loss: 0.5419 | Train Acc: 81.44% | Test Loss: 0.5880 | Test Acc: 79.98% | Best Acc: 80.06%
Epoch [49/200] | LR: 0.08591 | Train Loss: 0.5408 | Train Acc: 81.38% | Test Loss: 0.6966 | Test Acc: 76.50% | Best Acc: 80.06%
Epoch [50/200] | LR: 0.08536 | Train Loss: 0.5500 | Train Acc: 81.05% | Test Loss: 1.4030 | Test Acc: 59.96% | Best Acc: 80.06%
Epoch [51/200] | LR: 0.08480 | Train Loss: 0.5450 | Train Acc: 81.26% | Test Loss: 0.6372 | Test Acc: 78.16% | Best Acc: 80.06%
Epoch [52/200] | LR: 0.08423 | Train Loss: 0.5404 | Train Acc: 81.46% | Test Loss: 0.6160 | Test Acc: 78.61% | Best Acc: 80.06%
Epoch [53/200] | LR: 0.08365 | Train Loss: 0.5391 | Train Acc: 81.62% | Test Loss: 0.5845 | Test Acc: 80.05% | Best Acc: 80.06%
Epoch [54/200] | LR: 0.08307 | Train Loss: 0.5350 | Train Acc: 81.78% | Test Loss: 0.7556 | Test Acc: 75.14% | Best Acc: 80.06%
Epoch [55/200] | LR: 0.08247 | Train Loss: 0.5321 | Train Acc: 81.82% | Test Loss: 0.6838 | Test Acc: 77.21% | Best Acc: 80.06%
Epoch [56/200] | LR: 0.08187 | Train Loss: 0.5335 | Train Acc: 81.84% | Test Loss: 0.6078 | Test Acc: 79.26% | Best Acc: 80.06%
Epoch [57/200] | LR: 0.08126 | Train Loss: 0.5357 | Train Acc: 81.54% | Test Loss: 0.6456 | Test Acc: 77.54% | Best Acc: 80.06%
Epoch [58/200] | LR: 0.08065 | Train Loss: 0.5329 | Train Acc: 81.72% | Test Loss: 0.6962 | Test Acc: 76.57% | Best Acc: 80.06%
Epoch [59/200] | LR: 0.08002 | Train Loss: 0.5328 | Train Acc: 81.49% | Test Loss: 0.7766 | Test Acc: 74.36% | Best Acc: 80.06%
Epoch [60/200] | LR: 0.07939 | Train Loss: 0.5238 | Train Acc: 82.04% | Test Loss: 0.6449 | Test Acc: 77.72% | Best Acc: 80.06%
Epoch [61/200] | LR: 0.07875 | Train Loss: 0.5232 | Train Acc: 82.17% | Test Loss: 0.6394 | Test Acc: 78.61% | Best Acc: 80.06%
Epoch [62/200] | LR: 0.07810 | Train Loss: 0.5260 | Train Acc: 81.97% | Test Loss: 0.7067 | Test Acc: 76.03% | Best Acc: 80.06%
Epoch [63/200] | LR: 0.07745 | Train Loss: 0.5197 | Train Acc: 82.25% | Test Loss: 0.7388 | Test Acc: 75.05% | Best Acc: 80.06%
Epoch [64/200] | LR: 0.07679 | Train Loss: 0.5253 | Train Acc: 81.97% | Test Loss: 0.5825 | Test Acc: 79.63% | Best Acc: 80.06%
Epoch [65/200] | LR: 0.07612 | Train Loss: 0.5241 | Train Acc: 82.14% | Test Loss: 1.0174 | Test Acc: 69.13% | Best Acc: 80.06%
Epoch [66/200] | LR: 0.07545 | Train Loss: 0.5128 | Train Acc: 82.36% | Test Loss: 0.6088 | Test Acc: 80.01% | Best Acc: 80.06%
Epoch [67/200] | LR: 0.07477 | Train Loss: 0.5156 | Train Acc: 82.31% | Test Loss: 0.6500 | Test Acc: 78.27% | Best Acc: 80.06%
Epoch [68/200] | LR: 0.07409 | Train Loss: 0.5142 | Train Acc: 82.46% | Test Loss: 0.6106 | Test Acc: 79.32% | Best Acc: 80.06%
Epoch [69/200] | LR: 0.07340 | Train Loss: 0.5153 | Train Acc: 82.38% | Test Loss: 0.6865 | Test Acc: 76.62% | Best Acc: 80.06%
Epoch [70/200] | LR: 0.07270 | Train Loss: 0.5039 | Train Acc: 82.83% | Test Loss: 0.5980 | Test Acc: 79.04% | Best Acc: 80.06%
Epoch [71/200] | LR: 0.07200 | Train Loss: 0.5062 | Train Acc: 82.41% | Test Loss: 0.7023 | Test Acc: 75.98% | Best Acc: 80.06%
Epoch [72/200] | LR: 0.07129 | Train Loss: 0.5115 | Train Acc: 82.56% | Test Loss: 0.5639 | Test Acc: 80.29% | Best Acc: 80.29%
Epoch [73/200] | LR: 0.07058 | Train Loss: 0.5079 | Train Acc: 82.70% | Test Loss: 0.5437 | Test Acc: 81.57% | Best Acc: 81.57%
Epoch [74/200] | LR: 0.06986 | Train Loss: 0.5038 | Train Acc: 82.61% | Test Loss: 0.8000 | Test Acc: 73.95% | Best Acc: 81.57%
Epoch [75/200] | LR: 0.06913 | Train Loss: 0.4995 | Train Acc: 83.00% | Test Loss: 0.6195 | Test Acc: 79.16% | Best Acc: 81.57%
Epoch [76/200] | LR: 0.06841 | Train Loss: 0.4966 | Train Acc: 83.02% | Test Loss: 0.5831 | Test Acc: 80.28% | Best Acc: 81.57%
Epoch [77/200] | LR: 0.06767 | Train Loss: 0.5077 | Train Acc: 82.66% | Test Loss: 0.6209 | Test Acc: 79.13% | Best Acc: 81.57%
Epoch [78/200] | LR: 0.06694 | Train Loss: 0.4947 | Train Acc: 83.11% | Test Loss: 0.5711 | Test Acc: 80.52% | Best Acc: 81.57%
Epoch [79/200] | LR: 0.06620 | Train Loss: 0.4976 | Train Acc: 82.82% | Test Loss: 0.6121 | Test Acc: 79.26% | Best Acc: 81.57%
Epoch [80/200] | LR: 0.06545 | Train Loss: 0.4971 | Train Acc: 82.85% | Test Loss: 0.6153 | Test Acc: 78.39% | Best Acc: 81.57%
Epoch [81/200] | LR: 0.06470 | Train Loss: 0.4890 | Train Acc: 83.29% | Test Loss: 0.5672 | Test Acc: 80.39% | Best Acc: 81.57%
Epoch [82/200] | LR: 0.06395 | Train Loss: 0.4908 | Train Acc: 83.20% | Test Loss: 0.5596 | Test Acc: 79.92% | Best Acc: 81.57%
Epoch [83/200] | LR: 0.06319 | Train Loss: 0.4913 | Train Acc: 83.22% | Test Loss: 0.7151 | Test Acc: 75.82% | Best Acc: 81.57%
Epoch [84/200] | LR: 0.06243 | Train Loss: 0.4902 | Train Acc: 83.32% | Test Loss: 0.6949 | Test Acc: 78.01% | Best Acc: 81.57%
Epoch [85/200] | LR: 0.06167 | Train Loss: 0.4864 | Train Acc: 83.30% | Test Loss: 0.8197 | Test Acc: 72.84% | Best Acc: 81.57%
Epoch [86/200] | LR: 0.06091 | Train Loss: 0.4825 | Train Acc: 83.49% | Test Loss: 0.5204 | Test Acc: 82.58% | Best Acc: 82.58%
Epoch [87/200] | LR: 0.06014 | Train Loss: 0.4851 | Train Acc: 83.48% | Test Loss: 0.5970 | Test Acc: 79.60% | Best Acc: 82.58%
Epoch [88/200] | LR: 0.05937 | Train Loss: 0.4707 | Train Acc: 83.93% | Test Loss: 0.5901 | Test Acc: 80.22% | Best Acc: 82.58%
Epoch [89/200] | LR: 0.05860 | Train Loss: 0.4779 | Train Acc: 83.65% | Test Loss: 0.5141 | Test Acc: 82.73% | Best Acc: 82.73%
Epoch [90/200] | LR: 0.05782 | Train Loss: 0.4751 | Train Acc: 83.63% | Test Loss: 0.6033 | Test Acc: 80.03% | Best Acc: 82.73%
Epoch [91/200] | LR: 0.05705 | Train Loss: 0.4763 | Train Acc: 83.62% | Test Loss: 0.5935 | Test Acc: 79.48% | Best Acc: 82.73%
Epoch [92/200] | LR: 0.05627 | Train Loss: 0.4657 | Train Acc: 84.00% | Test Loss: 0.5527 | Test Acc: 81.58% | Best Acc: 82.73%
Epoch [93/200] | LR: 0.05549 | Train Loss: 0.4665 | Train Acc: 84.00% | Test Loss: 0.5946 | Test Acc: 80.31% | Best Acc: 82.73%
Epoch [94/200] | LR: 0.05471 | Train Loss: 0.4664 | Train Acc: 84.09% | Test Loss: 0.5371 | Test Acc: 81.84% | Best Acc: 82.73%
Epoch [95/200] | LR: 0.05392 | Train Loss: 0.4583 | Train Acc: 84.32% | Test Loss: 0.4865 | Test Acc: 83.62% | Best Acc: 83.62%
Epoch [96/200] | LR: 0.05314 | Train Loss: 0.4576 | Train Acc: 84.30% | Test Loss: 0.5598 | Test Acc: 81.16% | Best Acc: 83.62%
Epoch [97/200] | LR: 0.05236 | Train Loss: 0.4538 | Train Acc: 84.30% | Test Loss: 0.5073 | Test Acc: 82.75% | Best Acc: 83.62%
Epoch [98/200] | LR: 0.05157 | Train Loss: 0.4458 | Train Acc: 84.60% | Test Loss: 0.5352 | Test Acc: 82.17% | Best Acc: 83.62%
Epoch [99/200] | LR: 0.05079 | Train Loss: 0.4562 | Train Acc: 84.47% | Test Loss: 0.5226 | Test Acc: 82.06% | Best Acc: 83.62%
Epoch [100/200] | LR: 0.05000 | Train Loss: 0.4442 | Train Acc: 84.84% | Test Loss: 0.5760 | Test Acc: 80.65% | Best Acc: 83.62%
Epoch [101/200] | LR: 0.04921 | Train Loss: 0.4491 | Train Acc: 84.53% | Test Loss: 0.5361 | Test Acc: 81.59% | Best Acc: 83.62%
Epoch [102/200] | LR: 0.04843 | Train Loss: 0.4424 | Train Acc: 84.83% | Test Loss: 0.5511 | Test Acc: 81.60% | Best Acc: 83.62%
Epoch [103/200] | LR: 0.04764 | Train Loss: 0.4326 | Train Acc: 85.07% | Test Loss: 0.5767 | Test Acc: 80.54% | Best Acc: 83.62%
Epoch [104/200] | LR: 0.04686 | Train Loss: 0.4405 | Train Acc: 84.82% | Test Loss: 0.5779 | Test Acc: 80.05% | Best Acc: 83.62%
Epoch [105/200] | LR: 0.04608 | Train Loss: 0.4338 | Train Acc: 85.05% | Test Loss: 0.5148 | Test Acc: 82.70% | Best Acc: 83.62%
Epoch [106/200] | LR: 0.04529 | Train Loss: 0.4328 | Train Acc: 85.14% | Test Loss: 0.4959 | Test Acc: 83.23% | Best Acc: 83.62%
Epoch [107/200] | LR: 0.04451 | Train Loss: 0.4290 | Train Acc: 85.33% | Test Loss: 0.4640 | Test Acc: 84.26% | Best Acc: 84.26%
Epoch [108/200] | LR: 0.04373 | Train Loss: 0.4218 | Train Acc: 85.54% | Test Loss: 0.4635 | Test Acc: 84.29% | Best Acc: 84.29%
Epoch [109/200] | LR: 0.04295 | Train Loss: 0.4134 | Train Acc: 85.85% | Test Loss: 0.6596 | Test Acc: 78.26% | Best Acc: 84.29%
Epoch [110/200] | LR: 0.04218 | Train Loss: 0.4210 | Train Acc: 85.48% | Test Loss: 0.4772 | Test Acc: 83.47% | Best Acc: 84.29%
Epoch [111/200] | LR: 0.04140 | Train Loss: 0.4206 | Train Acc: 85.63% | Test Loss: 0.5719 | Test Acc: 80.76% | Best Acc: 84.29%
Epoch [112/200] | LR: 0.04063 | Train Loss: 0.4077 | Train Acc: 86.03% | Test Loss: 0.5626 | Test Acc: 80.66% | Best Acc: 84.29%
Epoch [113/200] | LR: 0.03986 | Train Loss: 0.4072 | Train Acc: 86.05% | Test Loss: 0.5878 | Test Acc: 80.01% | Best Acc: 84.29%
Epoch [114/200] | LR: 0.03909 | Train Loss: 0.4052 | Train Acc: 86.03% | Test Loss: 0.5347 | Test Acc: 82.40% | Best Acc: 84.29%
Epoch [115/200] | LR: 0.03833 | Train Loss: 0.4070 | Train Acc: 85.99% | Test Loss: 0.4778 | Test Acc: 83.69% | Best Acc: 84.29%
Epoch [116/200] | LR: 0.03757 | Train Loss: 0.3995 | Train Acc: 86.27% | Test Loss: 0.4739 | Test Acc: 83.83% | Best Acc: 84.29%
Epoch [117/200] | LR: 0.03681 | Train Loss: 0.3991 | Train Acc: 86.19% | Test Loss: 0.5056 | Test Acc: 82.83% | Best Acc: 84.29%
Epoch [118/200] | LR: 0.03605 | Train Loss: 0.3862 | Train Acc: 86.80% | Test Loss: 0.4663 | Test Acc: 84.68% | Best Acc: 84.68%
Epoch [119/200] | LR: 0.03530 | Train Loss: 0.3884 | Train Acc: 86.65% | Test Loss: 0.5069 | Test Acc: 83.21% | Best Acc: 84.68%
Epoch [120/200] | LR: 0.03455 | Train Loss: 0.3820 | Train Acc: 86.90% | Test Loss: 0.4613 | Test Acc: 84.39% | Best Acc: 84.68%
Epoch [121/200] | LR: 0.03380 | Train Loss: 0.3747 | Train Acc: 87.13% | Test Loss: 0.4520 | Test Acc: 84.72% | Best Acc: 84.72%
Epoch [122/200] | LR: 0.03306 | Train Loss: 0.3692 | Train Acc: 87.32% | Test Loss: 0.4614 | Test Acc: 84.78% | Best Acc: 84.78%
Epoch [123/200] | LR: 0.03233 | Train Loss: 0.3733 | Train Acc: 87.05% | Test Loss: 0.4841 | Test Acc: 83.37% | Best Acc: 84.78%
Epoch [124/200] | LR: 0.03159 | Train Loss: 0.3692 | Train Acc: 87.36% | Test Loss: 0.5189 | Test Acc: 82.57% | Best Acc: 84.78%
Epoch [125/200] | LR: 0.03087 | Train Loss: 0.3608 | Train Acc: 87.73% | Test Loss: 0.4186 | Test Acc: 85.85% | Best Acc: 85.85%
Epoch [126/200] | LR: 0.03014 | Train Loss: 0.3651 | Train Acc: 87.40% | Test Loss: 0.4394 | Test Acc: 85.04% | Best Acc: 85.85%
Epoch [127/200] | LR: 0.02942 | Train Loss: 0.3509 | Train Acc: 87.98% | Test Loss: 0.3884 | Test Acc: 86.82% | Best Acc: 86.82%
Epoch [128/200] | LR: 0.02871 | Train Loss: 0.3552 | Train Acc: 87.73% | Test Loss: 0.4213 | Test Acc: 85.39% | Best Acc: 86.82%
Epoch [129/200] | LR: 0.02800 | Train Loss: 0.3499 | Train Acc: 88.08% | Test Loss: 0.4846 | Test Acc: 83.76% | Best Acc: 86.82%
Epoch [130/200] | LR: 0.02730 | Train Loss: 0.3487 | Train Acc: 88.08% | Test Loss: 0.4301 | Test Acc: 85.43% | Best Acc: 86.82%
Epoch [131/200] | LR: 0.02660 | Train Loss: 0.3414 | Train Acc: 88.22% | Test Loss: 0.4192 | Test Acc: 86.22% | Best Acc: 86.82%
Epoch [132/200] | LR: 0.02591 | Train Loss: 0.3362 | Train Acc: 88.30% | Test Loss: 0.4354 | Test Acc: 85.31% | Best Acc: 86.82%
Epoch [133/200] | LR: 0.02523 | Train Loss: 0.3328 | Train Acc: 88.76% | Test Loss: 0.4162 | Test Acc: 86.10% | Best Acc: 86.82%
Epoch [134/200] | LR: 0.02455 | Train Loss: 0.3317 | Train Acc: 88.55% | Test Loss: 0.4063 | Test Acc: 86.21% | Best Acc: 86.82%
Epoch [135/200] | LR: 0.02388 | Train Loss: 0.3249 | Train Acc: 88.80% | Test Loss: 0.4409 | Test Acc: 85.25% | Best Acc: 86.82%
Epoch [136/200] | LR: 0.02321 | Train Loss: 0.3169 | Train Acc: 88.96% | Test Loss: 0.3810 | Test Acc: 86.93% | Best Acc: 86.93%
Epoch [137/200] | LR: 0.02255 | Train Loss: 0.3157 | Train Acc: 89.18% | Test Loss: 0.4380 | Test Acc: 85.74% | Best Acc: 86.93%
Epoch [138/200] | LR: 0.02190 | Train Loss: 0.3086 | Train Acc: 89.33% | Test Loss: 0.3833 | Test Acc: 87.33% | Best Acc: 87.33%
Epoch [139/200] | LR: 0.02125 | Train Loss: 0.3058 | Train Acc: 89.40% | Test Loss: 0.3626 | Test Acc: 87.68% | Best Acc: 87.68%
Epoch [140/200] | LR: 0.02061 | Train Loss: 0.3030 | Train Acc: 89.59% | Test Loss: 0.3705 | Test Acc: 87.76% | Best Acc: 87.76%
Epoch [141/200] | LR: 0.01998 | Train Loss: 0.3003 | Train Acc: 89.52% | Test Loss: 0.3545 | Test Acc: 87.98% | Best Acc: 87.98%
Epoch [142/200] | LR: 0.01935 | Train Loss: 0.2933 | Train Acc: 89.85% | Test Loss: 0.3780 | Test Acc: 87.27% | Best Acc: 87.98%
Epoch [143/200] | LR: 0.01874 | Train Loss: 0.2867 | Train Acc: 89.99% | Test Loss: 0.3625 | Test Acc: 88.21% | Best Acc: 88.21%
Epoch [144/200] | LR: 0.01813 | Train Loss: 0.2858 | Train Acc: 90.05% | Test Loss: 0.3411 | Test Acc: 88.34% | Best Acc: 88.34%
Epoch [145/200] | LR: 0.01753 | Train Loss: 0.2809 | Train Acc: 90.46% | Test Loss: 0.3512 | Test Acc: 88.31% | Best Acc: 88.34%
Epoch [146/200] | LR: 0.01693 | Train Loss: 0.2747 | Train Acc: 90.60% | Test Loss: 0.3618 | Test Acc: 88.21% | Best Acc: 88.34%
Epoch [147/200] | LR: 0.01635 | Train Loss: 0.2715 | Train Acc: 90.61% | Test Loss: 0.3546 | Test Acc: 88.30% | Best Acc: 88.34%
Epoch [148/200] | LR: 0.01577 | Train Loss: 0.2639 | Train Acc: 90.98% | Test Loss: 0.3749 | Test Acc: 87.27% | Best Acc: 88.34%
Epoch [149/200] | LR: 0.01520 | Train Loss: 0.2624 | Train Acc: 91.01% | Test Loss: 0.3428 | Test Acc: 88.53% | Best Acc: 88.53%
Epoch [150/200] | LR: 0.01464 | Train Loss: 0.2546 | Train Acc: 91.19% | Test Loss: 0.3513 | Test Acc: 88.20% | Best Acc: 88.53%
Epoch [151/200] | LR: 0.01409 | Train Loss: 0.2538 | Train Acc: 91.20% | Test Loss: 0.3282 | Test Acc: 89.14% | Best Acc: 89.14%
Epoch [152/200] | LR: 0.01355 | Train Loss: 0.2422 | Train Acc: 91.68% | Test Loss: 0.3083 | Test Acc: 89.76% | Best Acc: 89.76%
Epoch [153/200] | LR: 0.01302 | Train Loss: 0.2421 | Train Acc: 91.67% | Test Loss: 0.2981 | Test Acc: 89.83% | Best Acc: 89.83%
Epoch [154/200] | LR: 0.01249 | Train Loss: 0.2368 | Train Acc: 91.73% | Test Loss: 0.3510 | Test Acc: 88.36% | Best Acc: 89.83%
Epoch [155/200] | LR: 0.01198 | Train Loss: 0.2330 | Train Acc: 92.10% | Test Loss: 0.3290 | Test Acc: 89.35% | Best Acc: 89.83%
Epoch [156/200] | LR: 0.01147 | Train Loss: 0.2293 | Train Acc: 91.98% | Test Loss: 0.3424 | Test Acc: 88.92% | Best Acc: 89.83%
Epoch [157/200] | LR: 0.01098 | Train Loss: 0.2197 | Train Acc: 92.42% | Test Loss: 0.3089 | Test Acc: 89.85% | Best Acc: 89.85%
Epoch [158/200] | LR: 0.01049 | Train Loss: 0.2147 | Train Acc: 92.62% | Test Loss: 0.3162 | Test Acc: 89.73% | Best Acc: 89.85%
Epoch [159/200] | LR: 0.01002 | Train Loss: 0.2071 | Train Acc: 92.86% | Test Loss: 0.3387 | Test Acc: 88.91% | Best Acc: 89.85%
Epoch [160/200] | LR: 0.00955 | Train Loss: 0.2111 | Train Acc: 92.60% | Test Loss: 0.3034 | Test Acc: 90.16% | Best Acc: 90.16%
Epoch [161/200] | LR: 0.00909 | Train Loss: 0.2044 | Train Acc: 92.86% | Test Loss: 0.3177 | Test Acc: 89.39% | Best Acc: 90.16%
Epoch [162/200] | LR: 0.00865 | Train Loss: 0.1947 | Train Acc: 93.32% | Test Loss: 0.3028 | Test Acc: 90.19% | Best Acc: 90.19%
Epoch [163/200] | LR: 0.00821 | Train Loss: 0.1836 | Train Acc: 93.65% | Test Loss: 0.2843 | Test Acc: 90.70% | Best Acc: 90.70%
Epoch [164/200] | LR: 0.00778 | Train Loss: 0.1777 | Train Acc: 93.88% | Test Loss: 0.2908 | Test Acc: 90.51% | Best Acc: 90.70%
Epoch [165/200] | LR: 0.00737 | Train Loss: 0.1730 | Train Acc: 93.97% | Test Loss: 0.2892 | Test Acc: 90.45% | Best Acc: 90.70%
Epoch [166/200] | LR: 0.00696 | Train Loss: 0.1702 | Train Acc: 94.11% | Test Loss: 0.2776 | Test Acc: 91.10% | Best Acc: 91.10%
Epoch [167/200] | LR: 0.00657 | Train Loss: 0.1649 | Train Acc: 94.33% | Test Loss: 0.2810 | Test Acc: 91.05% | Best Acc: 91.10%
Epoch [168/200] | LR: 0.00618 | Train Loss: 0.1582 | Train Acc: 94.56% | Test Loss: 0.2909 | Test Acc: 90.72% | Best Acc: 91.10%
Epoch [169/200] | LR: 0.00581 | Train Loss: 0.1500 | Train Acc: 94.86% | Test Loss: 0.2813 | Test Acc: 91.05% | Best Acc: 91.10%
Epoch [170/200] | LR: 0.00545 | Train Loss: 0.1467 | Train Acc: 94.87% | Test Loss: 0.2785 | Test Acc: 90.98% | Best Acc: 91.10%
Epoch [171/200] | LR: 0.00510 | Train Loss: 0.1454 | Train Acc: 94.92% | Test Loss: 0.2682 | Test Acc: 91.40% | Best Acc: 91.40%
Epoch [172/200] | LR: 0.00476 | Train Loss: 0.1376 | Train Acc: 95.34% | Test Loss: 0.2737 | Test Acc: 91.15% | Best Acc: 91.40%
Epoch [173/200] | LR: 0.00443 | Train Loss: 0.1317 | Train Acc: 95.47% | Test Loss: 0.2698 | Test Acc: 91.38% | Best Acc: 91.40%
Epoch [174/200] | LR: 0.00411 | Train Loss: 0.1254 | Train Acc: 95.64% | Test Loss: 0.2747 | Test Acc: 91.13% | Best Acc: 91.40%
Epoch [175/200] | LR: 0.00381 | Train Loss: 0.1167 | Train Acc: 95.96% | Test Loss: 0.2641 | Test Acc: 91.51% | Best Acc: 91.51%
Epoch [176/200] | LR: 0.00351 | Train Loss: 0.1126 | Train Acc: 96.13% | Test Loss: 0.2608 | Test Acc: 91.79% | Best Acc: 91.79%
Epoch [177/200] | LR: 0.00323 | Train Loss: 0.1060 | Train Acc: 96.30% | Test Loss: 0.2699 | Test Acc: 91.51% | Best Acc: 91.79%
Epoch [178/200] | LR: 0.00296 | Train Loss: 0.1011 | Train Acc: 96.59% | Test Loss: 0.2528 | Test Acc: 92.31% | Best Acc: 92.31%
Epoch [179/200] | LR: 0.00270 | Train Loss: 0.0980 | Train Acc: 96.69% | Test Loss: 0.2630 | Test Acc: 91.86% | Best Acc: 92.31%
Epoch [180/200] | LR: 0.00245 | Train Loss: 0.0924 | Train Acc: 96.89% | Test Loss: 0.2600 | Test Acc: 92.01% | Best Acc: 92.31%
Epoch [181/200] | LR: 0.00221 | Train Loss: 0.0877 | Train Acc: 97.02% | Test Loss: 0.2581 | Test Acc: 92.13% | Best Acc: 92.31%
Epoch [182/200] | LR: 0.00199 | Train Loss: 0.0821 | Train Acc: 97.18% | Test Loss: 0.2559 | Test Acc: 92.31% | Best Acc: 92.31%
Epoch [183/200] | LR: 0.00177 | Train Loss: 0.0762 | Train Acc: 97.48% | Test Loss: 0.2642 | Test Acc: 92.10% | Best Acc: 92.31%
Epoch [184/200] | LR: 0.00157 | Train Loss: 0.0774 | Train Acc: 97.42% | Test Loss: 0.2575 | Test Acc: 92.51% | Best Acc: 92.51%
Epoch [185/200] | LR: 0.00138 | Train Loss: 0.0691 | Train Acc: 97.75% | Test Loss: 0.2598 | Test Acc: 92.37% | Best Acc: 92.51%
Epoch [186/200] | LR: 0.00120 | Train Loss: 0.0656 | Train Acc: 97.81% | Test Loss: 0.2535 | Test Acc: 92.54% | Best Acc: 92.54%
Epoch [187/200] | LR: 0.00104 | Train Loss: 0.0658 | Train Acc: 97.84% | Test Loss: 0.2614 | Test Acc: 92.39% | Best Acc: 92.54%
Epoch [188/200] | LR: 0.00089 | Train Loss: 0.0596 | Train Acc: 98.06% | Test Loss: 0.2558 | Test Acc: 92.58% | Best Acc: 92.58%
Epoch [189/200] | LR: 0.00074 | Train Loss: 0.0565 | Train Acc: 98.19% | Test Loss: 0.2560 | Test Acc: 92.75% | Best Acc: 92.75%
Epoch [190/200] | LR: 0.00062 | Train Loss: 0.0575 | Train Acc: 98.15% | Test Loss: 0.2552 | Test Acc: 92.58% | Best Acc: 92.75%
Epoch [191/200] | LR: 0.00050 | Train Loss: 0.0545 | Train Acc: 98.31% | Test Loss: 0.2516 | Test Acc: 92.75% | Best Acc: 92.75%
Epoch [192/200] | LR: 0.00039 | Train Loss: 0.0527 | Train Acc: 98.31% | Test Loss: 0.2539 | Test Acc: 92.70% | Best Acc: 92.75%
Epoch [193/200] | LR: 0.00030 | Train Loss: 0.0521 | Train Acc: 98.37% | Test Loss: 0.2529 | Test Acc: 92.67% | Best Acc: 92.75%
Epoch [194/200] | LR: 0.00022 | Train Loss: 0.0484 | Train Acc: 98.55% | Test Loss: 0.2527 | Test Acc: 92.76% | Best Acc: 92.76%
Epoch [195/200] | LR: 0.00015 | Train Loss: 0.0490 | Train Acc: 98.52% | Test Loss: 0.2540 | Test Acc: 92.81% | Best Acc: 92.81%
Epoch [196/200] | LR: 0.00010 | Train Loss: 0.0480 | Train Acc: 98.54% | Test Loss: 0.2535 | Test Acc: 92.81% | Best Acc: 92.81%
Epoch [197/200] | LR: 0.00006 | Train Loss: 0.0461 | Train Acc: 98.59% | Test Loss: 0.2515 | Test Acc: 92.87% | Best Acc: 92.87%
Epoch [198/200] | LR: 0.00002 | Train Loss: 0.0469 | Train Acc: 98.59% | Test Loss: 0.2517 | Test Acc: 92.77% | Best Acc: 92.87%
Epoch [199/200] | LR: 0.00001 | Train Loss: 0.0461 | Train Acc: 98.64% | Test Loss: 0.2525 | Test Acc: 92.79% | Best Acc: 92.87%
Epoch [200/200] | LR: 0.00000 | Train Loss: 0.0459 | Train Acc: 98.68% | Test Loss: 0.2539 | Test Acc: 92.87% | Best Acc: 92.87%
----- Finished Training -----
Best Accuracy: 92.87&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>AI/논문 리뷰</category>
      <author>우연입니다</author>
      <guid isPermaLink="true">https://orchidbyw1.tistory.com/9</guid>
      <comments>https://orchidbyw1.tistory.com/9#entry9comment</comments>
      <pubDate>Sun, 29 Mar 2026 18:39:47 +0900</pubDate>
    </item>
    <item>
      <title>[논문 리뷰] ResNet Implementation</title>
      <link>https://orchidbyw1.tistory.com/8</link>
      <description>&lt;blockquote data-ke-style=&quot;style3&quot;&gt;ResNet 논문을 읽은 뒤 CIFAR-10 분류 실험을 위해 ResNet56을 직접 구현하고 학습해보았다.&lt;br /&gt;논문 정리: &lt;a href=&quot;https://orchidbyw1.tistory.com/5&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;https://orchidbyw1.tistory.com/5&lt;/a&gt;&lt;/blockquote&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;모델 설정&lt;/span&gt;&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;코드 베이스: kuangliu/pytorch-cifar 참고&lt;/li&gt;
&lt;li&gt;데이터셋: CIFAR-10&lt;/li&gt;
&lt;li&gt;모델: ResNet56
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;BasicBlock 기반&lt;/li&gt;
&lt;li&gt;block 수: [9, 9, 9]&lt;/li&gt;
&lt;li&gt;초기 채널 수: 16&lt;/li&gt;
&lt;li&gt;초기 convolution: 3&amp;times;3, stride=1&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;ResNet56은 CIFAR용 ResNet 구조를 기준으로 하며, 3개의 stage에 각각 9개의 BasicBlock을 쌓는 방식이다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;초기 convolution도 ImageNet용 ResNet처럼 7&amp;times;7이 아니라, CIFAR-10 입력 크기(32&amp;times;32)에 맞게 &lt;span&gt;&lt;b&gt;3&amp;times;3 conv&lt;/b&gt;&lt;/span&gt;를 사용했다.&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;학습 설정&lt;/span&gt;&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Epoch: 200&lt;/li&gt;
&lt;li&gt;Batch size: 128&lt;/li&gt;
&lt;li&gt;Optimizer: SGD&lt;/li&gt;
&lt;li&gt;Learning rate: 0.1&lt;/li&gt;
&lt;li&gt;Momentum: 0.9&lt;/li&gt;
&lt;li&gt;Weight decay: 5e-4&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;학습은 ResNet 논문과 CIFAR 구현들에서 자주 사용하는 설정을 최대한 따랐다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;특히 SGD + momentum, 그리고 weight decay를 사용해 안정적으로 학습되도록 했다.&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;데이터 전처리&lt;/span&gt;&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;RandomCrop(32, padding=4)&lt;/li&gt;
&lt;li&gt;RandomHorizontalFlip&lt;/li&gt;
&lt;li&gt;Normalize: (0.4914, 0.4822, 0.4465) / (0.2023, 0.1994, 0.2010)&lt;/li&gt;
&lt;/ul&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;모델 구현&lt;/blockquote&gt;
&lt;pre id=&quot;code_1774776644279&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import torch
import torch.nn as nn
import torch.nn.functional as F

def forward(self, x):
  return self.lambd(x)

def conv3x3(in_planes, planes, stride=1):
  return nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)

def conv1x1(in_planes, planes, stride=1):
  return nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, padding=0, bias=False)

class BasicBlock(nn.Module): # nn.Module 상속받음

  expansion = 1 # 출력 채널 수 동일

  def __init__(self, in_planes, planes, stride=1): # 출력 채널이 동일하므로 stride=1
    super(BasicBlock, self).__init__() # nn.Module 초기화부터

    self.conv1 = conv3x3(in_planes, planes, stride)
    self.bn1 = nn.BatchNorm2d(planes)

    self.conv2 = conv3x3(planes, planes)
    self.bn2 = nn.BatchNorm2d(planes)

    self.shortcut = nn.Sequential() # 기본은 identity

    # 이후근데 연산에서 shortcut을 더해야 하는데(F(x)+x)
    if stride != 1 or in_planes != planes: # 차원이 안 맞는 경우
      # projection shortcut = 1x1 conv로 맞춤
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(self.expansion*planes)
      )

  def forward(self, x): # 블록 출력
    out = self.conv1(x)
    out = self.bn1(out)
    out = F.relu(out)
    out = self.conv2(out)
    out = self.bn2(out)
    out += self.shortcut(x) # 필요에 따라 layer를 건너뜀
    out = F.relu(out)
    return out

class BottleNeck(nn.Module):

  expansion = 4 # 채널 수 4배 증가

  def __init__(self, in_planes, planes, stride=1): # 출력 채널이 동일하므로 stride=1
    super(BottleNeck, self).__init__() # nn.Module 초기화부터

    # 1x1 convolution -&amp;gt; 채널 줄임
    self.conv1 = conv1x1(in_planes, planes, stride=1)
    self.bn1 = nn.BatchNorm2d(planes)

    # 3x3 convolution -&amp;gt; 줄인 채널로 연산 수행
    self.conv2 = conv3x3(planes, planes, stride)
    self.bn2 = nn.BatchNorm2d(planes)

    # 1x1 convolution -&amp;gt; 채널 다시 늘림
    self.conv3 = conv1x1(planes, self.expansion * planes, stride=1)
    self.bn3 = nn.BatchNorm2d(self.expansion * planes)

    self.shortcut = nn.Sequential() # 기본은 identity

    # 이후 연산에서 shortcut을 더해야 하는데(F(x)+x)
    if stride != 1 or in_planes != self.expansion * planes: # 차원이 안 맞는 경우
      # projection shortcut = 1x1 conv로 맞춤
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(self.expansion*planes)
      )

  def forward(self, x): # 블록 출력
    out = self.conv1(x)
    out = self.bn1(out)
    out = F.relu(out)
    out = self.conv2(out)
    out = self.bn2(out)
    out = F.relu(out)
    out = self.conv3(out)
    out = self.bn3(out)
    out += self.shortcut(x)
    out = F.relu(out)
    return out

class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes=10): # CIFAR10은 분류할 클래스 수가 10개
    super(ResNet, self).__init__() # nn.Module 초기화부터
    self.in_planes = 16 # 입력 채널 수

    # ResNet 논문 구조 적용.
    self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(self.in_planes)
    self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
    self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
    self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)

    # FC layer -&amp;gt; Basic Residual Block일 경우 그대로, BottleNeck일 경우 4를 곱함
    self.linear = nn.Linear(64 * block.expansion, num_classes)

  def _make_layer(self, block, planes, num_blocks, stride):
    strides = [stride] + [1]*(num_blocks-1) # 첫 block만 stride로 downsampling하고, 나머지는 크기 유지
    layers = []
    for stride in strides: # stride별로 돌면서 block을 생성
      layers.append(block(self.in_planes, planes, stride))
      self.in_planes = planes * block.expansion # 다음 block의 채널 수 업데이트

    return nn.Sequential(*layers) # 만든 block 리스트 보냄

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x))) # 초기 추출
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = F.avg_pool2d(out, out.size()[3]) # 공간 크기를 줄여서 (HxW -&amp;gt; 1x1)
    out = out.view(out.size(0), -1) # flatten
    out = self.linear(out) # fully connected
    return out&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;학습 및 테스트&lt;/blockquote&gt;
&lt;pre id=&quot;code_1774776694529&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn

# ----- 기본 세팅 -----
device = 'cuda' if torch.cuda.is_available() else 'cpu' # 디바이스 세팅. gpu 가능하면 그걸로

best_acc = 0  # best test accuracy
num_epochs = 200 # epoch 수

# ----- 데이터 전처리 -----
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4), # 랜덤하게 크롭
    transforms.RandomHorizontalFlip(), # 좌우 반전 랜덤 적용
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]) # train -&amp;gt; 더 다양하게 학습
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]) # test -&amp;gt; 성능 측정을 위해 tersor 변환과 정규화만

# ----- 데이터셋 -----
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=128, # 한 번에 128장씩 학습
    shuffle=True, # train을 위해 섞음
    num_workers=2
    )

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, 
    batch_size=100, # 한 번에 100장씩 섞음
    shuffle=False, # test이므로 섞을 필요 없음
    num_workers=2
    )

# ----- 모델 준비 -----
net = ResNet56()
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net) # 병렬로 사용
    cudnn.benchmark = True

# ----- loss / optimizer / scheduler -----
criterion = nn.CrossEntropyLoss() # 분류 문제에서 사용하는 loss 함수
optimizer = optim.SGD( # SGD optimizer 설정
    net.parameters(),
    lr=0.1,
    momentum=0.9,
    weight_decay=5e-4
    )
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs) # 학습률 조정 -&amp;gt; 학습 초반에는 크게 배우고 후반엔 세밀하게 조정

# ----- 학습 -----
def train():
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader): # 학습 데이터를 batch 단위로 하나씩 가져옴
        inputs, targets = inputs.to(device), targets.to(device) # 입력과 정답을 이동
        optimizer.zero_grad() # 이전 gradient 초기화
        outputs = net(inputs) # forward propagation
        loss = criterion(outputs, targets) # loss 계산
        loss.backward() # gradient 계산
        optimizer.step() # 가중치 업데이트

        train_loss += loss.item() # loss 누적
        _, predicted = outputs.max(1) # 예측값 계산
        total += targets.size(0) # 현재까지 accuracy 계산
        correct += predicted.eq(targets).sum().item()

    avg_train_loss = train_loss / len(trainloader)
    train_acc = 100. * correct / total

    return avg_train_loss, train_acc

# ----- 테스트 -----
def test():
    global best_acc # 현재까지의 최고 정확도 기록 위해
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad(): # 테스트할 땐 gradient가 필요 없으니까 계산을 끔
        for batch_idx, (inputs, targets) in enumerate(testloader): # 테스트 데이터를 batch 단위로 하나씩 가져옴
            inputs, targets = inputs.to(device), targets.to(device) # 입력과 정답을 이동
            outputs = net(inputs) # 학습 ㄴ 예측만
            loss = criterion(outputs, targets) # loss 구함

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0) # 현재까지 test accuracy 계산
            correct += predicted.eq(targets).sum().item()

    avg_test_loss = test_loss / len(testloader)
    test_acc = 100. * correct / total

    # 현재 test accuracy가 최고 성능보다 높으면 best_acc 갱신
    if test_acc &amp;gt; best_acc:
        best_acc = test_acc

    return avg_test_loss, test_acc

# ----- 메인 루프 -----
print(&quot;----- Start Training -----&quot;)
for epoch in range(num_epochs):
    train_loss, train_acc = train()
    test_loss, test_acc = test()
    scheduler.step()

    current_lr = optimizer.param_groups[0]['lr']

    print(f&quot;Epoch [{epoch+1}/{num_epochs}] | &quot;
          f&quot;LR: {current_lr:.5f} | &quot;
          f&quot;Train Loss: {train_loss:.4f} | &quot;
          f&quot;Train Acc: {train_acc:.2f}% | &quot;
          f&quot;Test Loss: {test_loss:.4f} | &quot;
          f&quot;Test Acc: {test_acc:.2f}% | &quot;
          f&quot;Best Acc: {best_acc:.2f}%&quot;)

# ----- 최종 출력 -----
print(&quot;----- Finished Training -----&quot;)
print(&quot;Best Accuracy:&quot;, best_acc)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;결과&lt;br /&gt;Best&amp;nbsp;Accuracy:&amp;nbsp;94.14&lt;/blockquote&gt;
&lt;pre id=&quot;code_1774776713546&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;Epoch [1/200] | LR: 0.09999 | Train Loss: 1.8704 | Train Acc: 30.02% | Test Loss: 1.5896 | Test Acc: 39.61% | Best Acc: 39.61%
Epoch [2/200] | LR: 0.09998 | Train Loss: 1.3654 | Train Acc: 49.97% | Test Loss: 1.2682 | Test Acc: 54.70% | Best Acc: 54.70%
Epoch [3/200] | LR: 0.09994 | Train Loss: 1.0178 | Train Acc: 63.67% | Test Loss: 1.0431 | Test Acc: 63.66% | Best Acc: 63.66%
Epoch [4/200] | LR: 0.09990 | Train Loss: 0.8334 | Train Acc: 70.96% | Test Loss: 0.7980 | Test Acc: 72.69% | Best Acc: 72.69%
Epoch [5/200] | LR: 0.09985 | Train Loss: 0.7221 | Train Acc: 74.96% | Test Loss: 0.8423 | Test Acc: 71.44% | Best Acc: 72.69%
Epoch [6/200] | LR: 0.09978 | Train Loss: 0.6727 | Train Acc: 76.92% | Test Loss: 0.9195 | Test Acc: 70.06% | Best Acc: 72.69%
Epoch [7/200] | LR: 0.09970 | Train Loss: 0.6115 | Train Acc: 79.07% | Test Loss: 0.7322 | Test Acc: 76.41% | Best Acc: 76.41%
Epoch [8/200] | LR: 0.09961 | Train Loss: 0.5909 | Train Acc: 79.73% | Test Loss: 0.7539 | Test Acc: 74.35% | Best Acc: 76.41%
Epoch [9/200] | LR: 0.09950 | Train Loss: 0.5707 | Train Acc: 80.42% | Test Loss: 0.8039 | Test Acc: 73.04% | Best Acc: 76.41%
Epoch [10/200] | LR: 0.09938 | Train Loss: 0.5587 | Train Acc: 80.62% | Test Loss: 0.7282 | Test Acc: 74.82% | Best Acc: 76.41%
Epoch [11/200] | LR: 0.09926 | Train Loss: 0.5383 | Train Acc: 81.35% | Test Loss: 0.7245 | Test Acc: 74.94% | Best Acc: 76.41%
Epoch [12/200] | LR: 0.09911 | Train Loss: 0.5244 | Train Acc: 82.04% | Test Loss: 1.0138 | Test Acc: 70.32% | Best Acc: 76.41%
Epoch [13/200] | LR: 0.09896 | Train Loss: 0.5212 | Train Acc: 82.29% | Test Loss: 0.6327 | Test Acc: 78.59% | Best Acc: 78.59%
Epoch [14/200] | LR: 0.09880 | Train Loss: 0.5057 | Train Acc: 82.51% | Test Loss: 0.6321 | Test Acc: 78.50% | Best Acc: 78.59%
Epoch [15/200] | LR: 0.09862 | Train Loss: 0.4955 | Train Acc: 83.00% | Test Loss: 0.7027 | Test Acc: 76.71% | Best Acc: 78.59%
Epoch [16/200] | LR: 0.09843 | Train Loss: 0.4929 | Train Acc: 83.08% | Test Loss: 0.6291 | Test Acc: 78.81% | Best Acc: 78.81%
Epoch [17/200] | LR: 0.09823 | Train Loss: 0.4821 | Train Acc: 83.47% | Test Loss: 0.6794 | Test Acc: 76.45% | Best Acc: 78.81%
Epoch [18/200] | LR: 0.09801 | Train Loss: 0.4766 | Train Acc: 83.60% | Test Loss: 0.8408 | Test Acc: 73.87% | Best Acc: 78.81%
Epoch [19/200] | LR: 0.09779 | Train Loss: 0.4705 | Train Acc: 83.92% | Test Loss: 0.7186 | Test Acc: 77.51% | Best Acc: 78.81%
Epoch [20/200] | LR: 0.09755 | Train Loss: 0.4585 | Train Acc: 84.24% | Test Loss: 0.5942 | Test Acc: 79.31% | Best Acc: 79.31%
Epoch [21/200] | LR: 0.09730 | Train Loss: 0.4635 | Train Acc: 84.20% | Test Loss: 1.1453 | Test Acc: 67.83% | Best Acc: 79.31%
Epoch [22/200] | LR: 0.09704 | Train Loss: 0.4571 | Train Acc: 84.27% | Test Loss: 0.5517 | Test Acc: 81.83% | Best Acc: 81.83%
Epoch [23/200] | LR: 0.09677 | Train Loss: 0.4495 | Train Acc: 84.47% | Test Loss: 0.6970 | Test Acc: 77.01% | Best Acc: 81.83%
Epoch [24/200] | LR: 0.09649 | Train Loss: 0.4446 | Train Acc: 84.70% | Test Loss: 0.6313 | Test Acc: 79.19% | Best Acc: 81.83%
Epoch [25/200] | LR: 0.09619 | Train Loss: 0.4483 | Train Acc: 84.79% | Test Loss: 0.5516 | Test Acc: 81.04% | Best Acc: 81.83%
Epoch [26/200] | LR: 0.09589 | Train Loss: 0.4428 | Train Acc: 84.79% | Test Loss: 0.5374 | Test Acc: 82.20% | Best Acc: 82.20%
Epoch [27/200] | LR: 0.09557 | Train Loss: 0.4351 | Train Acc: 84.94% | Test Loss: 0.5838 | Test Acc: 80.79% | Best Acc: 82.20%
Epoch [28/200] | LR: 0.09524 | Train Loss: 0.4380 | Train Acc: 84.96% | Test Loss: 0.5187 | Test Acc: 82.21% | Best Acc: 82.21%
Epoch [29/200] | LR: 0.09490 | Train Loss: 0.4290 | Train Acc: 85.23% | Test Loss: 0.6226 | Test Acc: 79.02% | Best Acc: 82.21%
Epoch [30/200] | LR: 0.09455 | Train Loss: 0.4206 | Train Acc: 85.38% | Test Loss: 0.6244 | Test Acc: 79.53% | Best Acc: 82.21%
Epoch [31/200] | LR: 0.09419 | Train Loss: 0.4232 | Train Acc: 85.44% | Test Loss: 0.6699 | Test Acc: 77.67% | Best Acc: 82.21%
Epoch [32/200] | LR: 0.09382 | Train Loss: 0.4248 | Train Acc: 85.30% | Test Loss: 1.0871 | Test Acc: 67.65% | Best Acc: 82.21%
Epoch [33/200] | LR: 0.09343 | Train Loss: 0.4143 | Train Acc: 85.89% | Test Loss: 0.6232 | Test Acc: 79.71% | Best Acc: 82.21%
Epoch [34/200] | LR: 0.09304 | Train Loss: 0.4164 | Train Acc: 85.61% | Test Loss: 0.5493 | Test Acc: 81.49% | Best Acc: 82.21%
Epoch [35/200] | LR: 0.09263 | Train Loss: 0.4059 | Train Acc: 86.19% | Test Loss: 0.4843 | Test Acc: 83.65% | Best Acc: 83.65%
Epoch [36/200] | LR: 0.09222 | Train Loss: 0.4108 | Train Acc: 85.79% | Test Loss: 0.8074 | Test Acc: 75.77% | Best Acc: 83.65%
Epoch [37/200] | LR: 0.09179 | Train Loss: 0.4100 | Train Acc: 85.93% | Test Loss: 0.6999 | Test Acc: 77.58% | Best Acc: 83.65%
Epoch [38/200] | LR: 0.09135 | Train Loss: 0.4032 | Train Acc: 86.04% | Test Loss: 0.5932 | Test Acc: 81.47% | Best Acc: 83.65%
Epoch [39/200] | LR: 0.09091 | Train Loss: 0.4029 | Train Acc: 85.99% | Test Loss: 0.7058 | Test Acc: 76.48% | Best Acc: 83.65%
Epoch [40/200] | LR: 0.09045 | Train Loss: 0.3990 | Train Acc: 86.28% | Test Loss: 0.5020 | Test Acc: 82.97% | Best Acc: 83.65%
Epoch [41/200] | LR: 0.08998 | Train Loss: 0.3994 | Train Acc: 86.19% | Test Loss: 0.5176 | Test Acc: 82.89% | Best Acc: 83.65%
Epoch [42/200] | LR: 0.08951 | Train Loss: 0.3965 | Train Acc: 86.46% | Test Loss: 0.6732 | Test Acc: 78.89% | Best Acc: 83.65%
Epoch [43/200] | LR: 0.08902 | Train Loss: 0.3889 | Train Acc: 86.57% | Test Loss: 0.8505 | Test Acc: 73.45% | Best Acc: 83.65%
Epoch [44/200] | LR: 0.08853 | Train Loss: 0.3928 | Train Acc: 86.56% | Test Loss: 0.5829 | Test Acc: 80.82% | Best Acc: 83.65%
Epoch [45/200] | LR: 0.08802 | Train Loss: 0.3912 | Train Acc: 86.71% | Test Loss: 0.6275 | Test Acc: 79.69% | Best Acc: 83.65%
Epoch [46/200] | LR: 0.08751 | Train Loss: 0.3901 | Train Acc: 86.56% | Test Loss: 0.6015 | Test Acc: 80.96% | Best Acc: 83.65%
Epoch [47/200] | LR: 0.08698 | Train Loss: 0.3921 | Train Acc: 86.41% | Test Loss: 0.5468 | Test Acc: 82.52% | Best Acc: 83.65%
Epoch [48/200] | LR: 0.08645 | Train Loss: 0.3822 | Train Acc: 87.00% | Test Loss: 0.4998 | Test Acc: 83.00% | Best Acc: 83.65%
Epoch [49/200] | LR: 0.08591 | Train Loss: 0.3840 | Train Acc: 86.79% | Test Loss: 0.7584 | Test Acc: 75.47% | Best Acc: 83.65%
Epoch [50/200] | LR: 0.08536 | Train Loss: 0.3787 | Train Acc: 87.02% | Test Loss: 0.8301 | Test Acc: 73.35% | Best Acc: 83.65%
Epoch [51/200] | LR: 0.08480 | Train Loss: 0.3775 | Train Acc: 87.15% | Test Loss: 0.6328 | Test Acc: 80.66% | Best Acc: 83.65%
Epoch [52/200] | LR: 0.08423 | Train Loss: 0.3707 | Train Acc: 87.25% | Test Loss: 0.6693 | Test Acc: 79.07% | Best Acc: 83.65%
Epoch [53/200] | LR: 0.08365 | Train Loss: 0.3746 | Train Acc: 87.12% | Test Loss: 0.5671 | Test Acc: 81.60% | Best Acc: 83.65%
Epoch [54/200] | LR: 0.08307 | Train Loss: 0.3754 | Train Acc: 87.08% | Test Loss: 0.4703 | Test Acc: 84.07% | Best Acc: 84.07%
Epoch [55/200] | LR: 0.08247 | Train Loss: 0.3695 | Train Acc: 87.38% | Test Loss: 0.6400 | Test Acc: 79.37% | Best Acc: 84.07%
Epoch [56/200] | LR: 0.08187 | Train Loss: 0.3648 | Train Acc: 87.56% | Test Loss: 0.5782 | Test Acc: 81.10% | Best Acc: 84.07%
Epoch [57/200] | LR: 0.08126 | Train Loss: 0.3640 | Train Acc: 87.54% | Test Loss: 0.6366 | Test Acc: 79.54% | Best Acc: 84.07%
Epoch [58/200] | LR: 0.08065 | Train Loss: 0.3615 | Train Acc: 87.55% | Test Loss: 0.5773 | Test Acc: 81.18% | Best Acc: 84.07%
Epoch [59/200] | LR: 0.08002 | Train Loss: 0.3578 | Train Acc: 87.72% | Test Loss: 0.5189 | Test Acc: 82.39% | Best Acc: 84.07%
Epoch [60/200] | LR: 0.07939 | Train Loss: 0.3570 | Train Acc: 87.66% | Test Loss: 0.4853 | Test Acc: 83.97% | Best Acc: 84.07%
Epoch [61/200] | LR: 0.07875 | Train Loss: 0.3589 | Train Acc: 87.60% | Test Loss: 0.5091 | Test Acc: 82.84% | Best Acc: 84.07%
Epoch [62/200] | LR: 0.07810 | Train Loss: 0.3539 | Train Acc: 87.89% | Test Loss: 0.8338 | Test Acc: 74.39% | Best Acc: 84.07%
Epoch [63/200] | LR: 0.07745 | Train Loss: 0.3509 | Train Acc: 87.89% | Test Loss: 0.6214 | Test Acc: 80.07% | Best Acc: 84.07%
Epoch [64/200] | LR: 0.07679 | Train Loss: 0.3497 | Train Acc: 87.88% | Test Loss: 0.5160 | Test Acc: 83.28% | Best Acc: 84.07%
Epoch [65/200] | LR: 0.07612 | Train Loss: 0.3502 | Train Acc: 88.02% | Test Loss: 0.6000 | Test Acc: 79.56% | Best Acc: 84.07%
Epoch [66/200] | LR: 0.07545 | Train Loss: 0.3450 | Train Acc: 88.17% | Test Loss: 0.4865 | Test Acc: 83.77% | Best Acc: 84.07%
Epoch [67/200] | LR: 0.07477 | Train Loss: 0.3445 | Train Acc: 88.15% | Test Loss: 0.4958 | Test Acc: 83.10% | Best Acc: 84.07%
Epoch [68/200] | LR: 0.07409 | Train Loss: 0.3424 | Train Acc: 88.16% | Test Loss: 0.4742 | Test Acc: 84.45% | Best Acc: 84.45%
Epoch [69/200] | LR: 0.07340 | Train Loss: 0.3394 | Train Acc: 88.39% | Test Loss: 0.6433 | Test Acc: 80.23% | Best Acc: 84.45%
Epoch [70/200] | LR: 0.07270 | Train Loss: 0.3410 | Train Acc: 88.28% | Test Loss: 0.6649 | Test Acc: 79.44% | Best Acc: 84.45%
Epoch [71/200] | LR: 0.07200 | Train Loss: 0.3343 | Train Acc: 88.66% | Test Loss: 0.5716 | Test Acc: 81.84% | Best Acc: 84.45%
Epoch [72/200] | LR: 0.07129 | Train Loss: 0.3275 | Train Acc: 88.83% | Test Loss: 0.5591 | Test Acc: 81.97% | Best Acc: 84.45%
Epoch [73/200] | LR: 0.07058 | Train Loss: 0.3315 | Train Acc: 88.53% | Test Loss: 0.8336 | Test Acc: 76.19% | Best Acc: 84.45%
Epoch [74/200] | LR: 0.06986 | Train Loss: 0.3324 | Train Acc: 88.63% | Test Loss: 0.5117 | Test Acc: 83.29% | Best Acc: 84.45%
Epoch [75/200] | LR: 0.06913 | Train Loss: 0.3266 | Train Acc: 88.81% | Test Loss: 0.4685 | Test Acc: 84.77% | Best Acc: 84.77%
Epoch [76/200] | LR: 0.06841 | Train Loss: 0.3251 | Train Acc: 88.72% | Test Loss: 0.5197 | Test Acc: 82.99% | Best Acc: 84.77%
Epoch [77/200] | LR: 0.06767 | Train Loss: 0.3252 | Train Acc: 88.87% | Test Loss: 0.5927 | Test Acc: 81.29% | Best Acc: 84.77%
Epoch [78/200] | LR: 0.06694 | Train Loss: 0.3251 | Train Acc: 88.87% | Test Loss: 0.4540 | Test Acc: 84.88% | Best Acc: 84.88%
Epoch [79/200] | LR: 0.06620 | Train Loss: 0.3126 | Train Acc: 89.17% | Test Loss: 0.6074 | Test Acc: 80.67% | Best Acc: 84.88%
Epoch [80/200] | LR: 0.06545 | Train Loss: 0.3188 | Train Acc: 88.89% | Test Loss: 0.4375 | Test Acc: 85.04% | Best Acc: 85.04%
Epoch [81/200] | LR: 0.06470 | Train Loss: 0.3096 | Train Acc: 89.33% | Test Loss: 0.4657 | Test Acc: 84.82% | Best Acc: 85.04%
Epoch [82/200] | LR: 0.06395 | Train Loss: 0.3152 | Train Acc: 89.29% | Test Loss: 0.4722 | Test Acc: 84.29% | Best Acc: 85.04%
Epoch [83/200] | LR: 0.06319 | Train Loss: 0.3120 | Train Acc: 89.28% | Test Loss: 0.3886 | Test Acc: 86.59% | Best Acc: 86.59%
Epoch [84/200] | LR: 0.06243 | Train Loss: 0.3077 | Train Acc: 89.42% | Test Loss: 0.5851 | Test Acc: 81.45% | Best Acc: 86.59%
Epoch [85/200] | LR: 0.06167 | Train Loss: 0.3044 | Train Acc: 89.42% | Test Loss: 0.4308 | Test Acc: 86.25% | Best Acc: 86.59%
Epoch [86/200] | LR: 0.06091 | Train Loss: 0.3076 | Train Acc: 89.48% | Test Loss: 0.4712 | Test Acc: 84.62% | Best Acc: 86.59%
Epoch [87/200] | LR: 0.06014 | Train Loss: 0.2938 | Train Acc: 89.94% | Test Loss: 0.4519 | Test Acc: 84.81% | Best Acc: 86.59%
Epoch [88/200] | LR: 0.05937 | Train Loss: 0.2982 | Train Acc: 89.78% | Test Loss: 0.4398 | Test Acc: 85.74% | Best Acc: 86.59%
Epoch [89/200] | LR: 0.05860 | Train Loss: 0.2944 | Train Acc: 89.95% | Test Loss: 0.4810 | Test Acc: 84.16% | Best Acc: 86.59%
Epoch [90/200] | LR: 0.05782 | Train Loss: 0.2890 | Train Acc: 90.10% | Test Loss: 0.3996 | Test Acc: 86.72% | Best Acc: 86.72%
Epoch [91/200] | LR: 0.05705 | Train Loss: 0.2895 | Train Acc: 90.01% | Test Loss: 0.4282 | Test Acc: 86.09% | Best Acc: 86.72%
Epoch [92/200] | LR: 0.05627 | Train Loss: 0.2882 | Train Acc: 90.09% | Test Loss: 0.4463 | Test Acc: 85.26% | Best Acc: 86.72%
Epoch [93/200] | LR: 0.05549 | Train Loss: 0.2797 | Train Acc: 90.37% | Test Loss: 0.3645 | Test Acc: 87.97% | Best Acc: 87.97%
Epoch [94/200] | LR: 0.05471 | Train Loss: 0.2817 | Train Acc: 90.21% | Test Loss: 0.4648 | Test Acc: 84.71% | Best Acc: 87.97%
Epoch [95/200] | LR: 0.05392 | Train Loss: 0.2778 | Train Acc: 90.43% | Test Loss: 0.4032 | Test Acc: 86.53% | Best Acc: 87.97%
Epoch [96/200] | LR: 0.05314 | Train Loss: 0.2782 | Train Acc: 90.47% | Test Loss: 0.4073 | Test Acc: 86.50% | Best Acc: 87.97%
Epoch [97/200] | LR: 0.05236 | Train Loss: 0.2721 | Train Acc: 90.72% | Test Loss: 0.4476 | Test Acc: 85.56% | Best Acc: 87.97%
Epoch [98/200] | LR: 0.05157 | Train Loss: 0.2686 | Train Acc: 90.71% | Test Loss: 0.5305 | Test Acc: 83.02% | Best Acc: 87.97%
Epoch [99/200] | LR: 0.05079 | Train Loss: 0.2652 | Train Acc: 90.86% | Test Loss: 0.4117 | Test Acc: 86.18% | Best Acc: 87.97%
Epoch [100/200] | LR: 0.05000 | Train Loss: 0.2593 | Train Acc: 90.94% | Test Loss: 0.4131 | Test Acc: 86.36% | Best Acc: 87.97%
Epoch [101/200] | LR: 0.04921 | Train Loss: 0.2626 | Train Acc: 90.93% | Test Loss: 0.3314 | Test Acc: 88.66% | Best Acc: 88.66%
Epoch [102/200] | LR: 0.04843 | Train Loss: 0.2600 | Train Acc: 91.02% | Test Loss: 0.4167 | Test Acc: 86.45% | Best Acc: 88.66%
Epoch [103/200] | LR: 0.04764 | Train Loss: 0.2557 | Train Acc: 91.12% | Test Loss: 0.4593 | Test Acc: 84.76% | Best Acc: 88.66%
Epoch [104/200] | LR: 0.04686 | Train Loss: 0.2562 | Train Acc: 91.21% | Test Loss: 0.5172 | Test Acc: 84.25% | Best Acc: 88.66%
Epoch [105/200] | LR: 0.04608 | Train Loss: 0.2528 | Train Acc: 91.34% | Test Loss: 0.4039 | Test Acc: 87.02% | Best Acc: 88.66%
Epoch [106/200] | LR: 0.04529 | Train Loss: 0.2435 | Train Acc: 91.61% | Test Loss: 0.4563 | Test Acc: 85.64% | Best Acc: 88.66%
Epoch [107/200] | LR: 0.04451 | Train Loss: 0.2470 | Train Acc: 91.50% | Test Loss: 0.4790 | Test Acc: 84.73% | Best Acc: 88.66%
Epoch [108/200] | LR: 0.04373 | Train Loss: 0.2439 | Train Acc: 91.61% | Test Loss: 0.4453 | Test Acc: 85.84% | Best Acc: 88.66%
Epoch [109/200] | LR: 0.04295 | Train Loss: 0.2358 | Train Acc: 91.91% | Test Loss: 0.6202 | Test Acc: 81.61% | Best Acc: 88.66%
Epoch [110/200] | LR: 0.04218 | Train Loss: 0.2391 | Train Acc: 91.60% | Test Loss: 0.3592 | Test Acc: 88.24% | Best Acc: 88.66%
Epoch [111/200] | LR: 0.04140 | Train Loss: 0.2358 | Train Acc: 91.88% | Test Loss: 0.4422 | Test Acc: 85.97% | Best Acc: 88.66%
Epoch [112/200] | LR: 0.04063 | Train Loss: 0.2304 | Train Acc: 92.07% | Test Loss: 0.3967 | Test Acc: 87.15% | Best Acc: 88.66%
Epoch [113/200] | LR: 0.03986 | Train Loss: 0.2230 | Train Acc: 92.15% | Test Loss: 0.3578 | Test Acc: 88.10% | Best Acc: 88.66%
Epoch [114/200] | LR: 0.03909 | Train Loss: 0.2253 | Train Acc: 92.28% | Test Loss: 0.3481 | Test Acc: 88.74% | Best Acc: 88.74%
Epoch [115/200] | LR: 0.03833 | Train Loss: 0.2229 | Train Acc: 92.30% | Test Loss: 0.3635 | Test Acc: 87.95% | Best Acc: 88.74%
Epoch [116/200] | LR: 0.03757 | Train Loss: 0.2178 | Train Acc: 92.64% | Test Loss: 0.3962 | Test Acc: 86.72% | Best Acc: 88.74%
Epoch [117/200] | LR: 0.03681 | Train Loss: 0.2141 | Train Acc: 92.62% | Test Loss: 0.3684 | Test Acc: 88.10% | Best Acc: 88.74%
Epoch [118/200] | LR: 0.03605 | Train Loss: 0.2149 | Train Acc: 92.58% | Test Loss: 0.3863 | Test Acc: 87.65% | Best Acc: 88.74%
Epoch [119/200] | LR: 0.03530 | Train Loss: 0.2080 | Train Acc: 93.04% | Test Loss: 0.4235 | Test Acc: 86.70% | Best Acc: 88.74%
Epoch [120/200] | LR: 0.03455 | Train Loss: 0.2075 | Train Acc: 92.72% | Test Loss: 0.4211 | Test Acc: 86.33% | Best Acc: 88.74%
Epoch [121/200] | LR: 0.03380 | Train Loss: 0.1995 | Train Acc: 93.19% | Test Loss: 0.4409 | Test Acc: 86.24% | Best Acc: 88.74%
Epoch [122/200] | LR: 0.03306 | Train Loss: 0.2008 | Train Acc: 93.05% | Test Loss: 0.3391 | Test Acc: 88.94% | Best Acc: 88.94%
Epoch [123/200] | LR: 0.03233 | Train Loss: 0.1937 | Train Acc: 93.25% | Test Loss: 0.3897 | Test Acc: 87.79% | Best Acc: 88.94%
Epoch [124/200] | LR: 0.03159 | Train Loss: 0.1899 | Train Acc: 93.53% | Test Loss: 0.3730 | Test Acc: 88.40% | Best Acc: 88.94%
Epoch [125/200] | LR: 0.03087 | Train Loss: 0.1910 | Train Acc: 93.44% | Test Loss: 0.3969 | Test Acc: 87.63% | Best Acc: 88.94%
Epoch [126/200] | LR: 0.03014 | Train Loss: 0.1879 | Train Acc: 93.49% | Test Loss: 0.3223 | Test Acc: 89.46% | Best Acc: 89.46%
Epoch [127/200] | LR: 0.02942 | Train Loss: 0.1796 | Train Acc: 93.77% | Test Loss: 0.3696 | Test Acc: 88.15% | Best Acc: 89.46%
Epoch [128/200] | LR: 0.02871 | Train Loss: 0.1817 | Train Acc: 93.77% | Test Loss: 0.3634 | Test Acc: 88.53% | Best Acc: 89.46%
Epoch [129/200] | LR: 0.02800 | Train Loss: 0.1758 | Train Acc: 93.91% | Test Loss: 0.3309 | Test Acc: 89.27% | Best Acc: 89.46%
Epoch [130/200] | LR: 0.02730 | Train Loss: 0.1664 | Train Acc: 94.24% | Test Loss: 0.4069 | Test Acc: 87.31% | Best Acc: 89.46%
Epoch [131/200] | LR: 0.02660 | Train Loss: 0.1647 | Train Acc: 94.27% | Test Loss: 0.3934 | Test Acc: 87.96% | Best Acc: 89.46%
Epoch [132/200] | LR: 0.02591 | Train Loss: 0.1645 | Train Acc: 94.36% | Test Loss: 0.3365 | Test Acc: 88.84% | Best Acc: 89.46%
Epoch [133/200] | LR: 0.02523 | Train Loss: 0.1644 | Train Acc: 94.43% | Test Loss: 0.3269 | Test Acc: 89.27% | Best Acc: 89.46%
Epoch [134/200] | LR: 0.02455 | Train Loss: 0.1557 | Train Acc: 94.64% | Test Loss: 0.3861 | Test Acc: 88.39% | Best Acc: 89.46%
Epoch [135/200] | LR: 0.02388 | Train Loss: 0.1521 | Train Acc: 94.75% | Test Loss: 0.3498 | Test Acc: 88.71% | Best Acc: 89.46%
Epoch [136/200] | LR: 0.02321 | Train Loss: 0.1484 | Train Acc: 94.89% | Test Loss: 0.3129 | Test Acc: 89.86% | Best Acc: 89.86%
Epoch [137/200] | LR: 0.02255 | Train Loss: 0.1493 | Train Acc: 94.82% | Test Loss: 0.4009 | Test Acc: 87.86% | Best Acc: 89.86%
Epoch [138/200] | LR: 0.02190 | Train Loss: 0.1441 | Train Acc: 95.07% | Test Loss: 0.3127 | Test Acc: 90.22% | Best Acc: 90.22%
Epoch [139/200] | LR: 0.02125 | Train Loss: 0.1354 | Train Acc: 95.32% | Test Loss: 0.3091 | Test Acc: 90.32% | Best Acc: 90.32%
Epoch [140/200] | LR: 0.02061 | Train Loss: 0.1325 | Train Acc: 95.51% | Test Loss: 0.3725 | Test Acc: 88.28% | Best Acc: 90.32%
Epoch [141/200] | LR: 0.01998 | Train Loss: 0.1346 | Train Acc: 95.29% | Test Loss: 0.3627 | Test Acc: 89.29% | Best Acc: 90.32%
Epoch [142/200] | LR: 0.01935 | Train Loss: 0.1333 | Train Acc: 95.40% | Test Loss: 0.3371 | Test Acc: 89.77% | Best Acc: 90.32%
Epoch [143/200] | LR: 0.01874 | Train Loss: 0.1267 | Train Acc: 95.67% | Test Loss: 0.3268 | Test Acc: 89.97% | Best Acc: 90.32%
Epoch [144/200] | LR: 0.01813 | Train Loss: 0.1169 | Train Acc: 96.00% | Test Loss: 0.3035 | Test Acc: 90.57% | Best Acc: 90.57%
Epoch [145/200] | LR: 0.01753 | Train Loss: 0.1116 | Train Acc: 96.26% | Test Loss: 0.3486 | Test Acc: 89.36% | Best Acc: 90.57%
Epoch [146/200] | LR: 0.01693 | Train Loss: 0.1089 | Train Acc: 96.36% | Test Loss: 0.3249 | Test Acc: 90.37% | Best Acc: 90.57%
Epoch [147/200] | LR: 0.01635 | Train Loss: 0.1056 | Train Acc: 96.42% | Test Loss: 0.3061 | Test Acc: 90.72% | Best Acc: 90.72%
Epoch [148/200] | LR: 0.01577 | Train Loss: 0.1063 | Train Acc: 96.29% | Test Loss: 0.2971 | Test Acc: 90.70% | Best Acc: 90.72%
Epoch [149/200] | LR: 0.01520 | Train Loss: 0.1023 | Train Acc: 96.51% | Test Loss: 0.3229 | Test Acc: 90.46% | Best Acc: 90.72%
Epoch [150/200] | LR: 0.01464 | Train Loss: 0.0973 | Train Acc: 96.75% | Test Loss: 0.2958 | Test Acc: 90.63% | Best Acc: 90.72%
Epoch [151/200] | LR: 0.01409 | Train Loss: 0.0912 | Train Acc: 96.88% | Test Loss: 0.3111 | Test Acc: 90.89% | Best Acc: 90.89%
Epoch [152/200] | LR: 0.01355 | Train Loss: 0.0886 | Train Acc: 96.99% | Test Loss: 0.3453 | Test Acc: 90.12% | Best Acc: 90.89%
Epoch [153/200] | LR: 0.01302 | Train Loss: 0.0894 | Train Acc: 96.99% | Test Loss: 0.3304 | Test Acc: 90.50% | Best Acc: 90.89%
Epoch [154/200] | LR: 0.01249 | Train Loss: 0.0772 | Train Acc: 97.32% | Test Loss: 0.3473 | Test Acc: 90.27% | Best Acc: 90.89%
Epoch [155/200] | LR: 0.01198 | Train Loss: 0.0740 | Train Acc: 97.51% | Test Loss: 0.3099 | Test Acc: 91.22% | Best Acc: 91.22%
Epoch [156/200] | LR: 0.01147 | Train Loss: 0.0747 | Train Acc: 97.48% | Test Loss: 0.2906 | Test Acc: 91.66% | Best Acc: 91.66%
Epoch [157/200] | LR: 0.01098 | Train Loss: 0.0722 | Train Acc: 97.52% | Test Loss: 0.3124 | Test Acc: 91.13% | Best Acc: 91.66%
Epoch [158/200] | LR: 0.01049 | Train Loss: 0.0643 | Train Acc: 97.85% | Test Loss: 0.2882 | Test Acc: 91.87% | Best Acc: 91.87%
Epoch [159/200] | LR: 0.01002 | Train Loss: 0.0584 | Train Acc: 98.04% | Test Loss: 0.2906 | Test Acc: 91.93% | Best Acc: 91.93%
Epoch [160/200] | LR: 0.00955 | Train Loss: 0.0547 | Train Acc: 98.22% | Test Loss: 0.3313 | Test Acc: 91.04% | Best Acc: 91.93%
Epoch [161/200] | LR: 0.00909 | Train Loss: 0.0533 | Train Acc: 98.19% | Test Loss: 0.2875 | Test Acc: 91.98% | Best Acc: 91.98%
Epoch [162/200] | LR: 0.00865 | Train Loss: 0.0503 | Train Acc: 98.41% | Test Loss: 0.3468 | Test Acc: 90.91% | Best Acc: 91.98%
Epoch [163/200] | LR: 0.00821 | Train Loss: 0.0463 | Train Acc: 98.48% | Test Loss: 0.2914 | Test Acc: 92.06% | Best Acc: 92.06%
Epoch [164/200] | LR: 0.00778 | Train Loss: 0.0443 | Train Acc: 98.52% | Test Loss: 0.2973 | Test Acc: 92.22% | Best Acc: 92.22%
Epoch [165/200] | LR: 0.00737 | Train Loss: 0.0398 | Train Acc: 98.71% | Test Loss: 0.2802 | Test Acc: 92.41% | Best Acc: 92.41%
Epoch [166/200] | LR: 0.00696 | Train Loss: 0.0361 | Train Acc: 98.83% | Test Loss: 0.2740 | Test Acc: 92.83% | Best Acc: 92.83%
Epoch [167/200] | LR: 0.00657 | Train Loss: 0.0317 | Train Acc: 99.01% | Test Loss: 0.3497 | Test Acc: 91.37% | Best Acc: 92.83%
Epoch [168/200] | LR: 0.00618 | Train Loss: 0.0320 | Train Acc: 99.01% | Test Loss: 0.2840 | Test Acc: 92.56% | Best Acc: 92.83%
Epoch [169/200] | LR: 0.00581 | Train Loss: 0.0262 | Train Acc: 99.18% | Test Loss: 0.2827 | Test Acc: 92.73% | Best Acc: 92.83%
Epoch [170/200] | LR: 0.00545 | Train Loss: 0.0261 | Train Acc: 99.21% | Test Loss: 0.2716 | Test Acc: 93.24% | Best Acc: 93.24%
Epoch [171/200] | LR: 0.00510 | Train Loss: 0.0211 | Train Acc: 99.42% | Test Loss: 0.2826 | Test Acc: 92.70% | Best Acc: 93.24%
Epoch [172/200] | LR: 0.00476 | Train Loss: 0.0194 | Train Acc: 99.44% | Test Loss: 0.2722 | Test Acc: 93.42% | Best Acc: 93.42%
Epoch [173/200] | LR: 0.00443 | Train Loss: 0.0184 | Train Acc: 99.49% | Test Loss: 0.2948 | Test Acc: 92.79% | Best Acc: 93.42%
Epoch [174/200] | LR: 0.00411 | Train Loss: 0.0145 | Train Acc: 99.62% | Test Loss: 0.2787 | Test Acc: 93.19% | Best Acc: 93.42%
Epoch [175/200] | LR: 0.00381 | Train Loss: 0.0126 | Train Acc: 99.67% | Test Loss: 0.2611 | Test Acc: 93.67% | Best Acc: 93.67%
Epoch [176/200] | LR: 0.00351 | Train Loss: 0.0107 | Train Acc: 99.75% | Test Loss: 0.2790 | Test Acc: 93.17% | Best Acc: 93.67%
Epoch [177/200] | LR: 0.00323 | Train Loss: 0.0099 | Train Acc: 99.77% | Test Loss: 0.2756 | Test Acc: 93.30% | Best Acc: 93.67%
Epoch [178/200] | LR: 0.00296 | Train Loss: 0.0091 | Train Acc: 99.79% | Test Loss: 0.2672 | Test Acc: 93.72% | Best Acc: 93.72%
Epoch [179/200] | LR: 0.00270 | Train Loss: 0.0094 | Train Acc: 99.76% | Test Loss: 0.2689 | Test Acc: 93.78% | Best Acc: 93.78%
Epoch [180/200] | LR: 0.00245 | Train Loss: 0.0078 | Train Acc: 99.84% | Test Loss: 0.2639 | Test Acc: 93.89% | Best Acc: 93.89%
Epoch [181/200] | LR: 0.00221 | Train Loss: 0.0065 | Train Acc: 99.89% | Test Loss: 0.2650 | Test Acc: 93.89% | Best Acc: 93.89%
Epoch [182/200] | LR: 0.00199 | Train Loss: 0.0058 | Train Acc: 99.89% | Test Loss: 0.2626 | Test Acc: 93.96% | Best Acc: 93.96%
Epoch [183/200] | LR: 0.00177 | Train Loss: 0.0058 | Train Acc: 99.89% | Test Loss: 0.2586 | Test Acc: 94.09% | Best Acc: 94.09%
Epoch [184/200] | LR: 0.00157 | Train Loss: 0.0056 | Train Acc: 99.91% | Test Loss: 0.2633 | Test Acc: 93.89% | Best Acc: 94.09%
Epoch [185/200] | LR: 0.00138 | Train Loss: 0.0052 | Train Acc: 99.89% | Test Loss: 0.2589 | Test Acc: 94.03% | Best Acc: 94.09%
Epoch [186/200] | LR: 0.00120 | Train Loss: 0.0046 | Train Acc: 99.94% | Test Loss: 0.2636 | Test Acc: 93.97% | Best Acc: 94.09%
Epoch [187/200] | LR: 0.00104 | Train Loss: 0.0045 | Train Acc: 99.94% | Test Loss: 0.2642 | Test Acc: 93.89% | Best Acc: 94.09%
Epoch [188/200] | LR: 0.00089 | Train Loss: 0.0048 | Train Acc: 99.93% | Test Loss: 0.2621 | Test Acc: 93.98% | Best Acc: 94.09%
Epoch [189/200] | LR: 0.00074 | Train Loss: 0.0042 | Train Acc: 99.94% | Test Loss: 0.2552 | Test Acc: 94.05% | Best Acc: 94.09%
Epoch [190/200] | LR: 0.00062 | Train Loss: 0.0039 | Train Acc: 99.96% | Test Loss: 0.2598 | Test Acc: 94.03% | Best Acc: 94.09%
Epoch [191/200] | LR: 0.00050 | Train Loss: 0.0037 | Train Acc: 99.95% | Test Loss: 0.2601 | Test Acc: 93.97% | Best Acc: 94.09%
Epoch [192/200] | LR: 0.00039 | Train Loss: 0.0037 | Train Acc: 99.95% | Test Loss: 0.2597 | Test Acc: 94.04% | Best Acc: 94.09%
Epoch [193/200] | LR: 0.00030 | Train Loss: 0.0035 | Train Acc: 99.96% | Test Loss: 0.2555 | Test Acc: 94.00% | Best Acc: 94.09%
Epoch [194/200] | LR: 0.00022 | Train Loss: 0.0035 | Train Acc: 99.96% | Test Loss: 0.2573 | Test Acc: 93.99% | Best Acc: 94.09%
Epoch [195/200] | LR: 0.00015 | Train Loss: 0.0033 | Train Acc: 99.96% | Test Loss: 0.2584 | Test Acc: 93.98% | Best Acc: 94.09%
Epoch [196/200] | LR: 0.00010 | Train Loss: 0.0036 | Train Acc: 99.96% | Test Loss: 0.2587 | Test Acc: 93.98% | Best Acc: 94.09%
Epoch [197/200] | LR: 0.00006 | Train Loss: 0.0034 | Train Acc: 99.96% | Test Loss: 0.2570 | Test Acc: 94.01% | Best Acc: 94.09%
Epoch [198/200] | LR: 0.00002 | Train Loss: 0.0032 | Train Acc: 99.97% | Test Loss: 0.2590 | Test Acc: 94.00% | Best Acc: 94.09%
Epoch [199/200] | LR: 0.00001 | Train Loss: 0.0033 | Train Acc: 99.97% | Test Loss: 0.2583 | Test Acc: 94.01% | Best Acc: 94.09%
Epoch [200/200] | LR: 0.00000 | Train Loss: 0.0035 | Train Acc: 99.96% | Test Loss: 0.2581 | Test Acc: 94.14% | Best Acc: 94.14%
Finished Training
Best Accuracy: 94.14&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>AI/논문 리뷰</category>
      <author>우연입니다</author>
      <guid isPermaLink="true">https://orchidbyw1.tistory.com/8</guid>
      <comments>https://orchidbyw1.tistory.com/8#entry8comment</comments>
      <pubDate>Sun, 29 Mar 2026 18:39:40 +0900</pubDate>
    </item>
    <item>
      <title>[논문 리뷰] VGGNet</title>
      <link>https://orchidbyw1.tistory.com/7</link>
      <description>&lt;blockquote style=&quot;background-color: #fcfcfc; color: #666666; text-align: left;&quot; data-ke-style=&quot;style3&quot;&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;논문 링크:&lt;span&gt; &lt;a href=&quot;https://arxiv.org/pdf/1409.1556.pdf&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://arxiv.org/pdf/1409.1556.pdf&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;짧게 요약하자면&lt;br /&gt;- 매우 작은 3x3 Covolution 필터를 반복적으로 사용하는 구조&lt;br /&gt;- 깊게 쌓은 네트워크를 통해 feature를 추출 (16~19 layers)&lt;br /&gt;- 구조가 단순하여 이해하기 쉬움 (계속 쌓는 구조)&lt;br /&gt;- 하지만 파라미터 수가 많고 연산량이 큼&lt;/blockquote&gt;
&lt;h2 style=&quot;text-align: left;&quot; data-ke-size=&quot;size26&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;ABSTRACT&lt;/span&gt;&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;- 연구 목표&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;CNN의 depth(깊이)가 accuracy에 미치는 영향 분석&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;- 방법&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;작은 3&amp;times;3 conv를 여러 개 쌓아 depth 증가&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;- 결과&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;깊은 네트워크가 더 높은 성능&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;INTRODUCTION&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;성능 향상에 중요한 요소:&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;작은 receptive field&lt;/li&gt;
&lt;li&gt;작은 stride&lt;/li&gt;
&lt;li&gt;&lt;b&gt;depth 증가 (핵심)&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;연구 방향: &lt;span style=&quot;background-color: #f6e199;&quot;&gt;모든 layer에 3&amp;times;3 conv를 사용하면서 depth를 증가시키는 방식 실험&lt;/span&gt;&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;Architecture&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Input image&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;224 X 224 RGB&lt;/li&gt;
&lt;li&gt;전처리는 RGB 평균값 빼주는 것만 적용&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Conv layer&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;3&amp;times;3 conv 사용&lt;/li&gt;
&lt;li&gt;stride = 1, padding = 1&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Pooling layer&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Conv layer 다음에 적용되고, 총 5개의 max pooling layer로 구성된다.&lt;/li&gt;
&lt;li&gt;2 x 2 사이즈, stride는 2&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Fully Connected layer&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;4096 -&amp;gt; 4096 -&amp;gt; 1000 (총 3개 FC)&lt;/li&gt;
&lt;li&gt;마지막은 softmax layer 적용&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;Configurations&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- Depth: 11 ~ 19 layer&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- A ~ E 모델&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1034&quot; data-origin-height=&quot;1206&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ofe2M/dJMcaiCKt3f/FklPTdq8eZRfZFefkhjwA0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ofe2M/dJMcaiCKt3f/FklPTdq8eZRfZFefkhjwA0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ofe2M/dJMcaiCKt3f/FklPTdq8eZRfZFefkhjwA0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fofe2M%2FdJMcaiCKt3f%2FFklPTdq8eZRfZFefkhjwA0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;507&quot; height=&quot;591&quot; data-origin-width=&quot;1034&quot; data-origin-height=&quot;1206&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; &lt;span style=&quot;background-color: #f6e199;&quot;&gt;depth가 증가해도 큰 커널보다 작은 커널 여러 개가 오히려 효율적&lt;/span&gt;&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;Discussion&lt;/span&gt;&lt;/h3&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;왜 3x3을 여러 개 쓰는가?&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;=&amp;gt; 핵심 이유 3가지&lt;/p&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;&lt;b&gt;Receptive field 동일&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;3&amp;times;3 &amp;times; 2 = 5&amp;times;5 효과&lt;/li&gt;
&lt;li&gt;3&amp;times;3 &amp;times; 3 = 7&amp;times;7 효과&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Non-linearity 증가&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;ReLU 함수 여러 번 통과&lt;/li&gt;
&lt;li&gt;더 복잡한 함수 학습 가능&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Parameter 감소&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;예를 들어,&lt;/li&gt;
&lt;li&gt;7x7 conv: 파라미터 많음&lt;/li&gt;
&lt;li&gt;3x3 x 3: 더 적음&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;Expreriments Results&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span&gt;(1) depth 증가 &amp;rarr; 성능 향상으로 이어짐&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;11 layer부터 19 layer까지 비교했을 때, 더 깊은 모델이 더 높은 정확도&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span&gt;(2) 작은 filter 구조가 더 우수&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;5x5나 7x7 같은 큰 필터 대신 3x3 필터를 여러 번 사용하는 구조가 더 좋은 성능을 보임&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span&gt;(3) LRN 효과 없음&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;기존 AlexNet에서 사용되던 LRN(Local Response Normalization)은 성능 향상에 거의 영향을 주지 않음&lt;/li&gt;
&lt;li&gt;그래서 깊은 모델(B~E)에서는 안 씀&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span&gt;(4) scale jittering &amp;rarr; 성능 개선&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;학습 시 이미지를 고정하지 않고 다양한 크기로 학습하는 scale jittering&lt;/li&gt;
&lt;li&gt;다양한 scale의 feature를 학습할 수 있음&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span&gt;(5) multi-crop &amp;rarr; 추가 성능&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;다양한 convolution 경계조건 때문에 dense 보완 가능&lt;/li&gt;
&lt;li&gt;주변 이미지 정보가 자연스럽게 반영됨. 보는 문맥(context)가 다양함&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;Conclusion&lt;/span&gt;&lt;/h3&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;&lt;u&gt;우리가 한 것&lt;/u&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;최대 19개 weight layer&lt;/li&gt;
&lt;li&gt;대규모 이미지 분류(ImageNet)&lt;/li&gt;
&lt;li&gt;매우 깊은 CNN 실험&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;u&gt;이를 통해 증명한 것&lt;/u&gt;&lt;br /&gt;: depth(깊이)가 분류 정확도를 향상시킨다&lt;br /&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;복잡한 구조 없이 기존의 &lt;b&gt;전통적인 ConvNet 구조&lt;/b&gt;를 유지해도 depth만 늘리면 최고 성능 가능&lt;/li&gt;
&lt;li&gt;즉, 특별한 트릭 없이 깊게만 만들었음&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;u&gt;일반화도 잘 됨&lt;/u&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;이 모델은 ImageNet 뿐만 아니라&lt;/li&gt;
&lt;li&gt;다른 데이터셋, 다른 작업에도 잘 일반화됨&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;결론적으로, &lt;span style=&quot;background-color: #f6e199;&quot;&gt;visual representation(시각적 표현)에서 depth(깊이)가 매우 중요하다는 것을 확인했다.&lt;/span&gt;&lt;/p&gt;</description>
      <category>AI/논문 리뷰</category>
      <author>우연입니다</author>
      <guid isPermaLink="true">https://orchidbyw1.tistory.com/7</guid>
      <comments>https://orchidbyw1.tistory.com/7#entry7comment</comments>
      <pubDate>Sun, 29 Mar 2026 18:22:37 +0900</pubDate>
    </item>
    <item>
      <title>[논문 리뷰] MobileNetV2</title>
      <link>https://orchidbyw1.tistory.com/6</link>
      <description>&lt;blockquote style=&quot;background-color: #fcfcfc; color: #666666; text-align: left;&quot; data-ke-style=&quot;style3&quot;&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;논문 링크: &lt;span&gt;&lt;a href=&quot;https://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;짧게 요약하자면&lt;br /&gt;- MobileNetV2는 모바일 환경을 위한 경량 CNN으로,&lt;br /&gt;- &lt;b&gt;inverted residual&lt;/b&gt;과 &lt;b&gt;linear bottleneck&lt;/b&gt;을 통해&lt;br /&gt;- 연산량과 메모리를 줄이면서도 정확도를 유지/개선한 모델이다.&lt;/blockquote&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #000000; background-color: #dddddd;&quot;&gt;왜 MobileNetV2가 필요한가&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- 기존 CNN(ResNet, VGG 등)은 성능은 좋지만 무거움&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- &lt;b&gt;모바일/임베디드 환경&lt;/b&gt;에서는 연산량, 메모리, latency가 중요함&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- MobileNetV1이 이미 가벼운 구조를 제안했지만, 정확도와 표현력 면에서 한계가 있었다&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- 그래서 MobileNetV2는 &lt;b&gt;가벼움을 유지하면서(연산량과 메모리 양을 줄이며) 정확도를 더 높이는 것&lt;/b&gt;이 목표!&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;MobileNetV2의 핵심 아이디어&lt;/span&gt;&lt;/h3&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;1. &lt;span style=&quot;color: #ee2323;&quot;&gt;Depthwise Separable Convolution&lt;/span&gt;&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;: 일반 convolution을 두 단계로 분리하는 방법&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;[ 일반 convolution ]&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 입력 크기: $ h_i \times w_i \times d_i $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 출력 채널 수: $ d_j $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 커널 크기: $ k \times k $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 연산량: $ h_i \cdot w_i \cdot d_i \cdot d_j \cdot k^2 $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;[ depthwise separable convolution ]&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;위 &lt;span style=&quot;background-color: #f6e199;&quot;&gt;일반 convolution의 연산을 두 단계로 나눔&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;1) &lt;b&gt;depthwise convolution&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 각 입력 채널마다 독립적으로 convolution 수행&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 채널 간 결합 없음&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 연산량: $ h_i \cdot w_i \cdot d_i \cdot k^2 $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;2) &lt;b&gt;pointwise convolution&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 채널을 선형 결합&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 새로운 feature 생성&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 연산량: $ h_i \cdot w_i \cdot d_i \cdot d_j $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 총 연산량: $ h_i \cdot w_i \cdot d_i ( k^2 + d_j ) $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이는 일반 convolution보다 약 8~9배 정도 연산량이 감소하게 된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;즉, MobileNetV2는 공간 연산과 채널 결합을 분리하여 계산량을 크게 줄인다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;2. &lt;span style=&quot;color: #ee2323;&quot;&gt;Linear Bottleneck&lt;/span&gt;&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;CNN에서는 보통 convolution 뒤에 ReLU같은&amp;nbsp;&lt;b&gt;비선형 활성화 함수&lt;/b&gt;를 사용한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;그러나 이 논문에서의 관찰으로는, CNN의 activation tensor는 고차원 공간이지만 실제 데이터는&amp;nbsp;&lt;b&gt;저차원 manifold&lt;/b&gt;에 존재한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 즉, &lt;span style=&quot;background-color: #f6e199;&quot;&gt;activation 공간은 고차원이지만, &lt;b&gt;실제 의미 있는 데이터(manifold of interest, 우리가 관심을 가지는 데이터)&lt;/b&gt;는 &lt;b&gt;저차원 부분공간&lt;/b&gt;에 존재한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 상황에서 ReLU 같은 non-linear 함수는 정보를 손실시킬 가능성이 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;특히 채널 수가 적은 bottleneck 공간에서 ReLU를 사용하면 정보 손실이 크게 발생할 수 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;따라서 MobileNetV2에서는&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 확장된 공간에서는 ReLU 사용&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- &lt;span style=&quot;background-color: #f6e199;&quot;&gt;bottleneck에서는 ReLU 제거 (linear)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;즉, 다음과 같은 &lt;b&gt;Linear Bottleneck&lt;/b&gt; 구조를 따른다.&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style3&quot;&gt;Expand&amp;nbsp;&amp;rarr;&amp;nbsp;ReLU&amp;nbsp;&amp;rarr;&amp;nbsp;Depthwise&amp;nbsp;&amp;rarr;&amp;nbsp;ReLU&amp;nbsp;&amp;rarr;&amp;nbsp;Linear&amp;nbsp;Projection&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;3. &lt;span style=&quot;color: #ee2323;&quot;&gt;Inverted Residual&lt;/span&gt;&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;ResNet&lt;/b&gt;의 &lt;b&gt;residual block&lt;/b&gt; 구조는 다음과 같다.&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1044&quot; data-origin-height=&quot;472&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/xUGl9/dJMb99MnHYE/4GGnKae4Z3162We7kfxCU1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/xUGl9/dJMb99MnHYE/4GGnKae4Z3162We7kfxCU1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/xUGl9/dJMb99MnHYE/4GGnKae4Z3162We7kfxCU1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FxUGl9%2FdJMb99MnHYE%2F4GGnKae4Z3162We7kfxCU1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;557&quot; height=&quot;472&quot; data-origin-width=&quot;1044&quot; data-origin-height=&quot;472&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 즉, 채널 수가 많은 공간에서 shortcut 연결이 이루어진다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;반면 &lt;b&gt;MobileNetV2&lt;/b&gt;는 다음과 같은 구조를 사용한다.&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1077&quot; data-origin-height=&quot;472&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/33HVH/dJMcafToRTD/OLO8arVw0udYj2LK5Lz6rK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/33HVH/dJMcafToRTD/OLO8arVw0udYj2LK5Lz6rK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/33HVH/dJMcafToRTD/OLO8arVw0udYj2LK5Lz6rK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F33HVH%2FdJMcafToRTD%2FOLO8arVw0udYj2LK5Lz6rK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;563&quot; height=&quot;247&quot; data-origin-width=&quot;1077&quot; data-origin-height=&quot;472&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 이를&amp;nbsp;&lt;b&gt;Inverted Residual&lt;/b&gt; 구조라고 한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;즉 비교하자면 다음과 같다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- &lt;b&gt;Residual block&lt;/b&gt;: Wide &amp;rarr; Narrow &amp;rarr; Wide&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- &lt;b&gt;Inverted residual block&lt;/b&gt;: Narrow &amp;rarr; Wide &amp;rarr; Narrow&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;핵심 아이디어는 다음과 같다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- &lt;span style=&quot;background-color: #f6e199;&quot;&gt;실제 중요한 정보는&amp;nbsp;&lt;b&gt;bottleneck(좁은 채널 공간)&lt;/b&gt;에 존재&lt;/span&gt;한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 확장된 채널 공간은&amp;nbsp;&lt;b&gt;비선형 변환을 위한 작업 공간&lt;/b&gt;으로 사용된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- &lt;span style=&quot;background-color: #f6e199;&quot;&gt;shortcut은 확장층끼리 연결하지 않고, &lt;b&gt;좁은 bottleneck 사이&lt;/b&gt;에서 연결&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;연산량을 비교하자면 다음과 같다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- &lt;b&gt;Residual block&lt;/b&gt;: $ h_i \cdot w_i \cdot d_i ( k^2 + d_j ) $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;nbsp;&lt;b&gt;Inverted residual block&lt;/b&gt;: $ h_i \cdot w_i \cdot d_i ( d' + k^2 + d''' ) $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; &lt;span style=&quot;background-color: #f6e199;&quot;&gt;입력과 출력의 채널 수가 더 적기 때문에, inverted residual block의 연산량이 더 적음&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;즉, 이 구조는 메모리 효율성과 연산 효율성을 동시에 개선한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;Bottleneck Block 구조 상세 설명&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;MobileNetV2의 기본 블록은&amp;nbsp;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;Bottleneck Residual Block&lt;/b&gt;&lt;/span&gt;이다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;논문에서는 이 블록을 다음과 같이 표현한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$ F(x) = [A \circ N \circ B]x $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- B: 확장 (expand)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- N: depthwise + ReLU6&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- A: projection (linear)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;구조는 다음과 같다.&lt;/p&gt;
&lt;pre id=&quot;code_1773590742242&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;x (k channels)
   │
1&amp;times;1 conv (expand)
   │
t&amp;middot;k channels
   │
3&amp;times;3 depthwise conv
   │
t&amp;middot;k channels
   │
1&amp;times;1 conv (projection)
   │
y (k channels)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;여기서&amp;nbsp;&lt;b&gt;expansion ratio t&lt;/b&gt;는 보통&amp;nbsp;&lt;b&gt;6&lt;/b&gt;으로 설정된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;또한 non-linearlity는 &lt;b&gt;ReLU6&lt;/b&gt;을 사용한다. (음수면 0, 0~6은 그대로, 6보다 크면 6으로 클리핑)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 이 activation은 모바일 환경에서의 정수 연산에 유리하다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;592&quot; data-origin-height=&quot;628&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/uGOBs/dJMcahjuPLC/4txBuJPcZkX7zVVHPe2aA0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/uGOBs/dJMcahjuPLC/4txBuJPcZkX7zVVHPe2aA0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/uGOBs/dJMcahjuPLC/4txBuJPcZkX7zVVHPe2aA0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FuGOBs%2FdJMcahjuPLC%2F4txBuJPcZkX7zVVHPe2aA0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;398&quot; height=&quot;422&quot; data-origin-width=&quot;592&quot; data-origin-height=&quot;628&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;MobileNet V2에서는 stride 값에 따라 두 가지 블록으로 나뉘게 된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- &lt;b&gt;stride 1&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;inverted residual block에서 skip connection을 진행&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- &lt;b&gt;stride 2&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;블록 구조는 stride 1과 동일하나 skip connection을 생략하고 Depthwise convolution에서 stride 2를 통한 downsampling을 진행&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;전체 네트워크 구조&lt;/span&gt;&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;584&quot; data-origin-height=&quot;260&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bG1Lze/dJMcahp9glx/grvDGHnfokv9deNjbQtam1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bG1Lze/dJMcahp9glx/grvDGHnfokv9deNjbQtam1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bG1Lze/dJMcahp9glx/grvDGHnfokv9deNjbQtam1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbG1Lze%2FdJMcahp9glx%2FgrvDGHnfokv9deNjbQtam1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;417&quot; height=&quot;186&quot; data-origin-width=&quot;584&quot; data-origin-height=&quot;260&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;830&quot; data-origin-height=&quot;960&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/26263/dJMcaiihge5/onXIT4B8vnIbkUKOGceGT0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/26263/dJMcaiihge5/onXIT4B8vnIbkUKOGceGT0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/26263/dJMcaiihge5/onXIT4B8vnIbkUKOGceGT0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F26263%2FdJMcaiihge5%2FonXIT4B8vnIbkUKOGceGT0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;432&quot; height=&quot;500&quot; data-origin-width=&quot;830&quot; data-origin-height=&quot;960&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;MobileNetV2 전체 구조&lt;/b&gt;는 다음과 같다.&lt;/p&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;초기 3x3 convolution (32 filters)&lt;/li&gt;
&lt;li&gt;19개의 bottleneck residual block&lt;/li&gt;
&lt;li&gt;마지막 1x1 convolution&lt;/li&gt;
&lt;li&gt;global avarage pooling&lt;/li&gt;
&lt;li&gt;classifier&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;모든&amp;nbsp;&lt;b&gt;bottleneck block&lt;/b&gt;은 다음 특징을 가진다.&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;depthwise separable convolution&lt;/b&gt; 사용&lt;/li&gt;
&lt;li&gt;&lt;b&gt;inverted residual&lt;/b&gt; 구조&lt;/li&gt;
&lt;li&gt;&lt;b&gt;linear bottleneck&lt;/b&gt; 적용&lt;/li&gt;
&lt;li&gt;&lt;b&gt;ReLU6&lt;/b&gt; activation 사용&lt;/li&gt;
&lt;li&gt;kernel size = 3&amp;times;3&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;또한 MobileNetV2는 다음 두 하이퍼 파라미터를 통해 성능과 연산량을 조절할 수 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;1. &lt;b&gt;Input Resolution&lt;/b&gt;: 입력 이미지 크기 조절(이미지 크기 &amp;darr; &amp;rarr; 정보량 &amp;darr;)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;2. &lt;b&gt;Width Multiplier&lt;/b&gt;: 채널 수를 일정 비율로 줄임(채널 &amp;darr; &amp;rarr; 표현력 &amp;darr;)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;* 단, V1과 다르게, width multiplier &amp;lt; 1일 때, 마지막 conv에서는 multiplier 적용 안 함 (작은 모델에서의 성능 향상을 위해)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;역할 차이를 보자면,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;resolution 줄이면 -&amp;gt; 공간 정보 감소 -&amp;gt; 이미지가 흐릿해짐&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;width 줄이면 -&amp;gt; feature 다양성 감소 -&amp;gt; 모델이 덜 똑똑해짐&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;이 두 파라미터를 조절해&lt;span&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;b&gt;accuracy&lt;/b&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;와&lt;span&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;b&gt;연산량&lt;/b&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&lt;span&gt;&amp;nbsp;&lt;/span&gt;사이의 trade-off를 조정할 수 있다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;연산량 및 메모리 효율 분석&lt;/span&gt;&lt;/h3&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;(1) 연산량 분석&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;MobileNetV2 bottleneck block의 연산량은 다음과 같이 계산된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;$ h \cdot w \cdot d' \cdot t (d' + k^2 + d'') $&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;- d'&lt;/span&gt;&lt;span&gt;: 입력 채널&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;- d''&lt;/span&gt;&lt;span&gt;: 출력 채널&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;- t&lt;/span&gt;&lt;span&gt;: expansion ratio&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;- k&lt;/span&gt;&lt;span&gt;: kernel size&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;연산량은 세 개의 convolution을 더해서 계산된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;1. Expansion Conv&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;&amp;nbsp; &amp;nbsp; $ h w d' (t d') = h w t d'^2 $&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;2. Depthwise Conv&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;&amp;nbsp; &amp;nbsp; $ h w (t d') k^2 $&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;3. Projection Conv&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;&amp;nbsp; &amp;nbsp; $ h w (t d') d'' $&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;전체 연산량: $ h w d' t (d' + k^2 + d'') $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;겉보기에는 convolution이 많아 보이지만,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;채널 수가 작고, depthwise convolution이 사용되기 때문에&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;전체 연산량은 일반 residual block보다 작다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;(2)&amp;nbsp; 메모리 효율 분석&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;일반적인 딥러닝 프레임워크는 연산을&amp;nbsp;&lt;b&gt;DAG (Directed Acyclic Graph)&lt;/b&gt; 형태로 표현한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이때 메모리 사용량은&amp;nbsp;&lt;b&gt;중간 tensor&lt;/b&gt; 크기에 크게 영향을 받는다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;MobileNetV2의 구조는 Narrow -&amp;gt; Wide -&amp;gt; Narrow 이므로&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;구조에서 Wide 텐서를 장시간 메모리에 저장할 필요 없다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;즉, 확장된 tensor는 임시 계산용이며, 계산 후 바로 버릴 수 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;따라서 &lt;span style=&quot;background-color: #f6e199;&quot;&gt;전체 메모리 사용량은 &lt;b&gt;확장층(Wide)&lt;/b&gt;이 아니라 &lt;b&gt;bottleneck 크기(Narrow)&lt;/b&gt;에 의해 결정&lt;/span&gt;된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 덕분에 작은 cache 메모리에서도 효율적으로 실행할 수 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;background-color: #dddddd;&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;논문의 의의와 한계&lt;/span&gt;&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;MobileNetV2의 가장 중요한 기여는 다음과 같다.&lt;/p&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;&lt;b&gt;inverted residual 구조 제안&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;linear bottleneck 구조 도입&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;연산량과 메모리 사용 감소&lt;/li&gt;
&lt;li&gt;모바일 환경에 적합한 CNN 구조 제시&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;특히 이 논문은&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;nbsp;&lt;b&gt;capacity (정보 저장 공간)&lt;/b&gt;&lt;span&gt;과&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;-&amp;nbsp;&lt;/span&gt;&lt;b&gt;expressiveness (비선형 표현력)&lt;/b&gt;&lt;span&gt;을 분리했다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;이 설계는 이후 많은 경량 CNN 설계에 영향을 미쳤다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;하지만 이 논문의 한계는 다음과 같다.&lt;/span&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;여전히 accuracy와 model size 사이 trade-off 존재&lt;/li&gt;
&lt;li&gt;고성능 서버용 모델보다는 모바일 환경에 최적화&lt;/li&gt;
&lt;li&gt;구조 이해가 비교적 복잡&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #000000; background-color: #dddddd;&quot;&gt;결론&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;MobileNetV2는 &lt;span&gt;&lt;b&gt;inverted residual&lt;/b&gt;&lt;/span&gt;과 &lt;span&gt;&lt;b&gt;linear bottleneck&lt;/b&gt;&lt;/span&gt;을 기반으로 한 경량 CNN 구조를 제안하였다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 구조는 다음과 같은 장점을 가진다.&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;연산량 감소&lt;/li&gt;
&lt;li&gt;메모리 효율 증가&lt;/li&gt;
&lt;li&gt;모바일 환경에서 높은 성능 유지&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;실험 결과에서도 MobileNetV2는&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;ImageNet classification&lt;/li&gt;
&lt;li&gt;Object detection&lt;/li&gt;
&lt;li&gt;Semantic segmentation&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;등 다양한 작업에서 &lt;span&gt;높은 효율성과 경쟁력 있는 성능&lt;/span&gt;을 보여주었다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;따라서 MobileNetV2는 &lt;b&gt;모바일 환경을 위한 대표적인 CNN 아키텍처&lt;/b&gt;로 평가된다.&lt;/p&gt;</description>
      <category>AI/논문 리뷰</category>
      <author>우연입니다</author>
      <guid isPermaLink="true">https://orchidbyw1.tistory.com/6</guid>
      <comments>https://orchidbyw1.tistory.com/6#entry6comment</comments>
      <pubDate>Mon, 16 Mar 2026 01:32:59 +0900</pubDate>
    </item>
    <item>
      <title>[논문 리뷰] ResNet</title>
      <link>https://orchidbyw1.tistory.com/5</link>
      <description>&lt;blockquote data-ke-style=&quot;style3&quot;&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;논문 링크: &lt;a href=&quot;https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf&quot;&gt;https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf&lt;/a&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;h2 style=&quot;text-align: left;&quot; data-ke-size=&quot;size26&quot;&gt;&lt;b&gt;Introduction&lt;/b&gt;&lt;/h2&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;왜 ResNet이 등장했는가?&lt;/h3&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;VGG를 통해서, 네트워크는 깊을수록 성능이 좋아진다는 것을 알게 되었다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;그래서 layer를 점점 더 쌓았으나, depth가 깊어질수록 문제가 발생하였음.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #ee2323;&quot;&gt;Degradataion Problem&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;네트워크가 깊어질수록&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- test error만 증가하는 것이 아니라,&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- training error도 증가하는 문제!&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;이건 overfitting의 문제가 아님. training error도 같이 높아졌으므로&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;*** 이론적으로는 말이 안 됨!&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;얕은 모델이 있고, 거기에 레이어를 더 쌓은 깊은 모델을 만든다면,&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;추가된 레이어를 &lt;b&gt;identity mapping&lt;/b&gt;으로 두면 깊은 모델은 얕은 모델과 같은 성능을 낼 수 있어야 함!&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 즉, 깊은 모델의 training error는 절대 얕은 모델보다 나빠질 수 없음&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;gt;&amp;gt; 그렇지만 실제로는 더 나빠짐. 이게 바로 &lt;b&gt;degradation problem&lt;/b&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;*** 따라서 본 논문에서는 degradation problem을 해결하기 위해 residual learning을 이용하였고, 이 모델이 ResNet이다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;해결 아이디어: &lt;span style=&quot;color: #ee2323;&quot;&gt;Residual Learning&lt;/span&gt;&lt;/h3&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- &lt;b&gt;기존 CNN&lt;/b&gt;은 이렇게 학습함: $H(x)$&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 입력 x를 어떤 출력 $H(x)$로 직접 변환하는 방법&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- &lt;b&gt;ResNet&lt;/b&gt;은 이렇게 바꿈:&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;$F(x) = H(x) - x$&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;즉, $H(x) = F(x) + x$&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;-&amp;gt; &lt;span style=&quot;background-color: #f6e199;&quot;&gt;전체 함수를 배우는 대신 입력 대비 &lt;b&gt;변화량(residual)&lt;/b&gt;을 학습하는 방법&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;왜 residual이 더 쉬운가?&lt;/h3&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;- identity가 최적일 경우&lt;/b&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;일반 CNN&lt;/b&gt;에서 identity를 만들려면:&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;$ W_2(\sigma(W_1 x)) = x $&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;이걸 여러 비선형 레이어로 구현해야 함.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 굉장히 어려움!&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;- ResNet&lt;/b&gt;에서는 $ H(x) = F(x) + x $&lt;span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;따라서 identity가 최적이면&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;$ F(x) = 0 $&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;즉, 그냥 residual을 0으로 만들면 됨. 그럼 즉시 $ y = x $ (항등 함수, identity function)이 구현됨&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;-&amp;gt; 엄청 쉬움!&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;최적 함수가 identity 근처에 있을 가능성이 높음&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;*** 즉, identity를 기준으로 두면 최적화가 훨씬 쉬워짐&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;이를 위해 입력 x를 뒤로 보내는 shortcut connection을 제안하였다.&lt;/p&gt;
&lt;h4 style=&quot;text-align: left;&quot; data-ke-size=&quot;size20&quot;&gt;&lt;span&gt;구현: &lt;span style=&quot;color: #ee2323;&quot;&gt;Shortcut Connection&lt;/span&gt;&lt;/span&gt;&lt;/h4&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;768&quot; data-origin-height=&quot;414&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/d1MS4U/dJMcaaxDcoV/06L9oqkfX4VXV9HPzNCzs1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/d1MS4U/dJMcaaxDcoV/06L9oqkfX4VXV9HPzNCzs1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/d1MS4U/dJMcaaxDcoV/06L9oqkfX4VXV9HPzNCzs1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd1MS4U%2FdJMcaaxDcoV%2F06L9oqkfX4VXV9HPzNCzs1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;581&quot; height=&quot;414&quot; data-origin-width=&quot;768&quot; data-origin-height=&quot;414&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;shortcut connection&lt;/b&gt;이란 두 개의 레이어를 건너뛰는 연결이다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;위 그림처럼, &lt;span style=&quot;background-color: #f6e199;&quot;&gt;identity mapping(x)을 다음 레이어에 더해주는 것&lt;/span&gt;을 뜻한다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;-&amp;gt; 즉, 입력 x를 두 개 conv 지나서 나온 F(x)에 더한다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;$ y = F(x) + x $ -&amp;gt; 이게 바로 shortcut&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;lt; shortcut connection 특징 &amp;gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;1. 파라미터 없음&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;2. 계산량 거의 증가 없음&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;3. gradient가 직접 흐름&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h2 style=&quot;text-align: left;&quot; data-ke-size=&quot;size26&quot;&gt;&lt;b&gt;Deep Residual Learning&lt;/b&gt;&lt;/h2&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;1. Residual Learning&lt;/h3&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;위에서 설명했듯이,&amp;nbsp;&lt;b&gt;Residual Learning&lt;/b&gt;은 $H(x) = F(x) + x$을 학습하게 된다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;그러면, $F(x) = H(x) - x$&lt;span style=&quot;background-color: #ffffff; text-align: start;&quot;&gt;가 되고, 이를 &lt;b&gt;residual(잔차)&lt;/b&gt;이라 한다.&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #000000; text-align: start;&quot;&gt;생각해보면, x는 학습대상이 아니므로 위에서 나온 block은 결국 residual을 학습하게 된다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #000000; text-align: start;&quot;&gt;따라서, 이를 &lt;b&gt;residual learning&lt;/b&gt;이라 한다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;2. Identity Mapping by Shoutcuts&lt;/h3&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Residual Learning&lt;/b&gt;을 수식으로 적으면 다음과 같다.&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;444&quot; data-origin-height=&quot;100&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b8MiWx/dJMcadA2ASp/YpZkF2S7MrcDw9BilNRo30/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b8MiWx/dJMcadA2ASp/YpZkF2S7MrcDw9BilNRo30/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b8MiWx/dJMcadA2ASp/YpZkF2S7MrcDw9BilNRo30/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb8MiWx%2FdJMcadA2ASp%2FYpZkF2S7MrcDw9BilNRo30%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;297&quot; height=&quot;100&quot; data-origin-width=&quot;444&quot; data-origin-height=&quot;100&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;여기서 $x$와 $y$는 각각 레이어의 입력과 출력이다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;$F$는 &lt;b&gt;residual mapping&lt;/b&gt;에 해당하고, $x$는 &lt;b&gt;identity mapping&lt;/b&gt;에 해당한다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;이 둘을 &lt;b&gt;short connection&lt;/b&gt;으로 더해주는데 이는 &lt;b&gt;element-wise addition&lt;/b&gt;이다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;따라서, &lt;span style=&quot;color: #000000; background-color: #f6e199;&quot;&gt;$F$와 $x$의 feature map 크기와 Channel의 개수는 동일해야 &lt;/span&gt;한다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- feature map의 크기를 맞추기 위해서 residual mapping 쪽 Conv 레이어에 Padding을 적용시킨다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;- channel의 개수를 맞추기 위해서 identity mapping에 linear projection을 적용시킨다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;3. Network Architecture&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;586&quot; data-origin-height=&quot;1344&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Hz3yK/dJMcacvsFca/pkkMHe88CJUSHK7mUCjbv0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Hz3yK/dJMcacvsFca/pkkMHe88CJUSHK7mUCjbv0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Hz3yK/dJMcacvsFca/pkkMHe88CJUSHK7mUCjbv0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FHz3yK%2FdJMcacvsFca%2FpkkMHe88CJUSHK7mUCjbv0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;586&quot; height=&quot;1344&quot; data-origin-width=&quot;586&quot; data-origin-height=&quot;1344&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #000000; text-align: start;&quot;&gt;실험을 위해 VGG-19, VGG-style Plain Network, ResNet을 사용하였다고 한다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;span style=&quot;background-color: #ffffff; text-align: start;&quot;&gt;ResNet은 위에 설명한대로 Residual block으로 구성되었고, &lt;/span&gt;&lt;span style=&quot;background-color: #ffffff; text-align: start;&quot;&gt;channel의 개수가 증가하면, Identity mapping에 Zero-padding이나 1 x 1 Convolution을 적용해 channel의 개수를 늘렸다고 한다. &lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #000000; text-align: start;&quot;&gt;두가지 경우 모두 feature map size를 줄이기 위해서 stride=2를 적용하여 downsampling한다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h2 style=&quot;text-align: left;&quot; data-ke-size=&quot;size26&quot;&gt;&lt;b&gt;Experiments&lt;/b&gt;&lt;/h2&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;Plain Network vs ResNet&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;404&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/nkTGI/dJMcaduhHzN/BPrL0oCPFkgj3XrkLm1Un0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/nkTGI/dJMcaduhHzN/BPrL0oCPFkgj3XrkLm1Un0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/nkTGI/dJMcaduhHzN/BPrL0oCPFkgj3XrkLm1Un0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FnkTGI%2FdJMcaduhHzN%2FBPrL0oCPFkgj3XrkLm1Un0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;404&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;404&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;- 그림에서 보이다시피, Plain Network는 degradation problem이 발생하는 반면, ResNet에서는 발생하지 않았다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;- ResNet-34의 top-1 error는 25.03%로써, Plain-34보다 3.5%가량 낮았다. 이는 Residual Learning의 효율성을 입증한다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;- 18개의 레이어에서는 두 모델 모두 잘 수렴하였지만, ResNet이 수렴속도가 더 빨랐다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;Identity vs Projection shortcuts&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;위에서 Channel의 개수가 늘어나면(=차원이 증가할 때)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;zero-padding이나 1 x 1 Convolution으로 채널의 개수를 늘릴 수 있다고 하였다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;-&amp;gt; 이를 위해서 3가지의 모델을 실험하였다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&amp;nbsp; ( A ) Channel의 개수를 늘릴 때만 zero-padding shortcut 사용 // 그 이외는 identity&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&amp;nbsp; ( B ) Channel의 개수를 늘릴 때만 1 x 1 Conv shortcut 사용 // 그 이외는 identity&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&amp;nbsp; ( C ) 모든 shortcut을 1 x 1 Conv로 사용&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;738&quot; data-origin-height=&quot;648&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/FhIjT/dJMcafZYFl9/k98RTCAPwYnlNN8PCeEnh0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/FhIjT/dJMcafZYFl9/k98RTCAPwYnlNN8PCeEnh0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/FhIjT/dJMcafZYFl9/k98RTCAPwYnlNN8PCeEnh0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FFhIjT%2FdJMcafZYFl9%2Fk98RTCAPwYnlNN8PCeEnh0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;368&quot; height=&quot;323&quot; data-origin-width=&quot;738&quot; data-origin-height=&quot;648&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;실험 결과, C가 성능이 가장 우수하였다.&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;*** 결론적으로, projection shortcut이 필수는 아니며, degradation 해결에는 identity shortcut만으로 충분함&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;Deeper Bottleneck Architecture&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;문제&lt;/b&gt;: 100층 이상 가려면 연산량이 폭증함&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; &lt;u&gt;더 깊은 네트워크를 만들고 싶어서, 기존 블록을 &lt;span style=&quot;color: #ee2323;&quot;&gt;&lt;b&gt;bottleneck&lt;/b&gt;&lt;/span&gt; 구조로 변경함&lt;/u&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;각 레이어의 역할을 보자면,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 첫 1&amp;times;1 &amp;rarr; 채널 수 줄임 (compression)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 3&amp;times;3 &amp;rarr; 핵심 연산 수행&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 마지막 1&amp;times;1 &amp;rarr; 채널 수 복원&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;rarr; 즉, &lt;span style=&quot;background-color: #f6e199;&quot;&gt;비싼 3x3 연산을 작은 차원에서 수행&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1064&quot; data-origin-height=&quot;400&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cKQu2l/dJMcagYTxz1/gIkqynx74lIGq0WKl4WCj0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cKQu2l/dJMcagYTxz1/gIkqynx74lIGq0WKl4WCj0/img.png&quot; data-alt=&quot;Left : ResNet-34 Residual Block&amp;amp;amp;nbsp;// Right : ResNet 50 / 101 / 152&amp;amp;amp;nbsp;Residual Block&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cKQu2l/dJMcagYTxz1/gIkqynx74lIGq0WKl4WCj0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcKQu2l%2FdJMcagYTxz1%2FgIkqynx74lIGq0WKl4WCj0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1064&quot; height=&quot;400&quot; data-origin-width=&quot;1064&quot; data-origin-height=&quot;400&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;Left : ResNet-34 Residual Block&amp;amp;nbsp;// Right : ResNet 50 / 101 / 152&amp;amp;nbsp;Residual Block&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot;&gt;왼쪽 그림은 ResNet-34에 사용되는 Residual Block이고, 오른쪽은 ResNet-50, ResNet-101, ResNet-152에 사용되는 Residual Block이다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot;&gt;*** identity shortcut이 중요한 이유:&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot;&gt;bottleneck 구조에서는 입력과 출력의 차원이 크다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot;&gt;그래서 만약 shortcute을 projection(1x1 conv)로 바꾸면 계산량이 증가하고, 모델 크기가 2배 가까이 증가한다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot;&gt;-&amp;gt; 그래서 identity shortcut이 훨씬 효율적이다!&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;574&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/9MnDp/dJMcaaEpfxl/m7kE942CAfCbSdyQyP9Wy0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/9MnDp/dJMcaaEpfxl/m7kE942CAfCbSdyQyP9Wy0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/9MnDp/dJMcaaEpfxl/m7kE942CAfCbSdyQyP9Wy0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F9MnDp%2FdJMcaaEpfxl%2Fm7kE942CAfCbSdyQyP9Wy0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;574&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;574&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;ResNet-50을 구성하기 위해 기존 2개의 레이어를 갖는 Residual Block을 3개의 레이어를 갖는 Residual Block으로 교체하였다고 한다. 이 때, Channel을 늘리기 위해서 옵션 B를 사용하였다고 한다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;ResNet-101, ResNet-152를 구성하기 위해 3개의 레이어를 갖는 Residual Block을 사용하였다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000; background-color: #f6e199;&quot;&gt;레이어가 상당히 깊음에도 불구하고, VGGNet보다 파라미터 수가 적었다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;Result&lt;/span&gt;&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignLeft&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1066&quot; data-origin-height=&quot;1188&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Mkhgz/dJMcadHPOuH/jzVki0UZyQKQMkG4vIcOXK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Mkhgz/dJMcadHPOuH/jzVki0UZyQKQMkG4vIcOXK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Mkhgz/dJMcadHPOuH/jzVki0UZyQKQMkG4vIcOXK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMkhgz%2FdJMcadHPOuH%2FjzVki0UZyQKQMkG4vIcOXK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;441&quot; height=&quot;491&quot; data-origin-width=&quot;1066&quot; data-origin-height=&quot;1188&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;위 그림은 Single Model로 평가하였을 때 top-1 error와 top-5 error를 보여준다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;아래 그림은 Ensemble Model의 결과이다. 주목할만한 점은 &lt;span style=&quot;color: #000000;&quot;&gt;ResNet-152 Single Model의 error가 기존의 ensemble model들의 결과를 모두 제쳤다는 것이다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;Ensemble Model을 위해 6개의 다른 레이어 깊이를 갖는 모델들을 사용하였다고 한다.&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot;background-color: #ffffff; color: #555555; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;text-align: left;&quot; data-ke-size=&quot;size23&quot;&gt;CIFAR-10 and&amp;nbsp; Analysis&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;ImageNet 뿐만 아니라 CIFAR-10 Dataset에 대해서도 실험을 진행하였다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;점점 layer를 추가하여 실험을 진행한 결과, layer가 깊어질수록 더 좋은 성능을 보였다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;하지만 문제가 하나 있었는데, 1202 layer의 모델이 110 layer 모델보다 성능이 좋지 않았다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;*이는 overfitting 때문이었다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h2 style=&quot;text-align: left;&quot; data-ke-size=&quot;size26&quot;&gt;&lt;b&gt;(+)&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Residual Learning이란 한마디로, 기존의 전체 함수 H(x)를 직접 학습하는 방식이 아닌, 입력 x와 목표 출력 H(x) 간의&amp;nbsp;&lt;b&gt;잔차(residual)&lt;/b&gt;에 해당하는 함수&amp;nbsp;&lt;b&gt;F(x) = H(x) - x&lt;/b&gt;를 학습하는 방식이다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;논문에서는 reisidual learning 구현을 위해&amp;nbsp;&lt;b&gt;shortcut connection&lt;/b&gt;을 제안하였다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;b&gt;shortcut&lt;/b&gt;&amp;nbsp;connection&lt;/b&gt;이란,&amp;nbsp;입력(x)을 레이어들을 몇 개 건너뛰어 전달하는 것이다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 구조 덕분에 내트워크 구조가&amp;nbsp;&lt;b&gt;y = F(x) + x&lt;/b&gt;가 되었다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;우리가 만들고 싶은 것은 &lt;b&gt;H(x)&lt;/b&gt;인데, 구조가 &lt;b&gt;y = F(x) + x&lt;/b&gt;이므로,&amp;nbsp;&lt;b&gt;H(x) = F(x) + x&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 즉 &lt;b&gt;F(x) = H(x) - x&lt;/b&gt;가 된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;그래서 conv block이 자동으로 &lt;b&gt;잔차(residual)&lt;/b&gt;을 학습하게 된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이때 shortcut connection을 통해 전달된 입력 x와 conv layer에서 계산된 F(x)가 더해져서&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;최종 출력인 &lt;b&gt;H(x) = F(x) + x을&lt;/b&gt; 만들게 된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;따라서 네트워크는 전체 함수 H(x)를 직접 근사하는 대신 입력 대비 변화량 F(x)를 학습하게 되며,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이는 identity mapping 근처에서의 최적화를 훨씬 쉽게 만든다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;i&gt;ResNet은 Residual Learning을 실제로 구현하기 위해 두 가지 residual block 구조를 사용한다.&lt;/i&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;1. &lt;b&gt;일반 Residual block&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 구성: 3x3 Conv -&amp;gt; ReLU -&amp;gt; 3x3 Conv&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 입력과 출력의 shape이 같아야 identity shortcut을 사용할 수 있음&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 구조가 단순하여 shallow 네트워크에 적합&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;2. &lt;b&gt;Bottleneck block&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 구성: 1x1 Conv (축소) -&amp;gt; 3x3 Conv -&amp;gt; 1x1 Conv (확장)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 계산량 절감과 효율적인 파라미터 사용을 위해 설계됨&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 이때 입력과 출력의 차원이 증가하면(채널 수가 바뀌면) &lt;b&gt;projection shortcut&lt;/b&gt;을 사용하여 채널 수를 맞춤&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 깊은 네트워크에서도 연산량을 늘리지 않고 Residual Learning을 효율적으로 적용할 수 있도록 도와주는 구조적 최적화&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1078&quot; data-origin-height=&quot;386&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/nM4df/dJMcahp1pGp/kAFNKoSGT2TukjflQEFZJ1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/nM4df/dJMcahp1pGp/kAFNKoSGT2TukjflQEFZJ1/img.png&quot; data-alt=&quot;(왼) 일반 Residual Block / (오) Bottleneck Block&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/nM4df/dJMcahp1pGp/kAFNKoSGT2TukjflQEFZJ1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FnM4df%2FdJMcahp1pGp%2FkAFNKoSGT2TukjflQEFZJ1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;606&quot; height=&quot;217&quot; data-origin-width=&quot;1078&quot; data-origin-height=&quot;386&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;(왼) 일반 Residual Block / (오) Bottleneck Block&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;즉, ResNet이 성능이 좋아진 이유는,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;1. &lt;b&gt;깊은 네트워크를 실제로 학습 가능하게 만들었기 때문&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 네트워크는 깊을수록 표현력이 좋아짐&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 하지만 문제는, &lt;u&gt;깊게 쌓으면 training error가 증가 (degradation problem)&lt;/u&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 즉, 표현력은 충분하지만 최적화가 실패함&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; ResNet은 이 최적화 문제를 해결했음&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;2. &lt;b&gt;Residual Learning이 최적화를 쉽게 만들었기 때문&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 기존 CNN: $H(x)$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- ResNet: $H(x) = x + F(x)$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 이는 '함수 공간을 identity 기준으로 재구성한 것(&lt;b&gt;reparameterization&lt;/b&gt;)'&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 즉, &lt;u&gt;identity 근처에서 탐색하도록 함&lt;/u&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 입력에 작은 수정을 할 수 있도록 구성하였으므로, &lt;u&gt;residual 형태가 더 최적화하기 쉬움&lt;/u&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;*** 즉, H(x)를 직접 근사하는 것보다, x와의 차이 F(x)를 학습하는 편이 최적화하기 쉬움&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;논문의 핵심 논리는 '&lt;b&gt;깊은 모델은 얕은 모델을 포함할 수 있어야 한다.&lt;/b&gt;'&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;추가된 레이어가 identity라면 성능이 나빠질 일은 없어야 함.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;하지만 plain net은 이 identity를 학습하기 어려웠음. -&amp;gt; &lt;u&gt;ResNet을 통해 쉽게 identity를 찾을 수 있음&lt;/u&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;결론적으로, ResNet을 통해 기존보다 더 깊은 층 구현 성공&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;당시 CNN은 20층~30층이 한계였으나, ResNet은 152-layer를 성공!&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #333333; text-align: start;&quot;&gt;결과적으로 &lt;u&gt;신경망 깊이의 한계를 1000개 레이어 이상으로 확장했다&lt;/u&gt;는 데에 의의가 있다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #333333; text-align: start;&quot;&gt;(+ 추가 정보)&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #ffffff; color: #333333; text-align: start;&quot;&gt;이 논문의 방법이 효과적인 이유는 propagation에 도움을 주었기 때문이라고 분석하고 있다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;일반적인 deep network에서는 gradient가 모든 layer를 거쳐 전달되며,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;layer가 깊어질수록 gradient가 반복적으로 곱해지면서 점점 작아지는 vanishing gradient 문제가 발생할 수 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;ResNet block에서는 shortcut connection을 통해 입력 x가 직접 전달되며, 출력은 y = F(x) + x 형태가 된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 구조에서 역전파 시 gradient는 &amp;part;F(x)/&amp;part;x + 1 형태로 전달되는데, shortcut 경로에서 전달되는 항이 1이기 때문에&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;conv layer를 거치는 경로의 gradient가 작아지더라도 gradient가 완전히 사라지지 않고 안정적으로 전달될 수 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;-&amp;gt; 이러한 특성 덕분에 ResNet은 매우 깊은 네트워크에서도 효율적으로 학습할 수 있었다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style3&quot;&gt;참고&lt;br /&gt;https://imlim0813.tistory.com/34&lt;br /&gt;https://deep-math.tistory.com/18&lt;br /&gt;https://cat-b0.tistory.com/116&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>AI/논문 리뷰</category>
      <author>우연입니다</author>
      <guid isPermaLink="true">https://orchidbyw1.tistory.com/5</guid>
      <comments>https://orchidbyw1.tistory.com/5#entry5comment</comments>
      <pubDate>Sun, 1 Mar 2026 02:00:24 +0900</pubDate>
    </item>
    <item>
      <title>UTM 우분투 용량 확장</title>
      <link>https://orchidbyw1.tistory.com/4</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;리눅스 커널을 컴파일하려면 최소 40GB가 필요하다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;그래서 처음 용량보다 확장해야 하는 일이 생겨서 그 과정에 대해 기록하려고 한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #8a3db6;&quot;&gt;#1. 가상머신 자체 용량 늘리기&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;* 가상머신은 꺼져 있는 상태여야 한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;내 기기에서 실행한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;1. (없으면) qemu 설치&lt;/b&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1759253385418&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;brew install qemu&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;2. 용량 확장&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #dddddd; color: #212529; text-align: start;&quot;&gt;~/Library/Containers/com.utmapp.UTM/Data/Documents&lt;/span&gt;&lt;span style=&quot;background-color: #ffffff; color: #333333; text-align: start;&quot;&gt;&amp;nbsp;로 이동한다.&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1152&quot; data-origin-height=&quot;110&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/9UJbf/btsQX08QYQB/0FeDg6HRhjNkQ4Tfb9WrK0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/9UJbf/btsQX08QYQB/0FeDg6HRhjNkQ4Tfb9WrK0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/9UJbf/btsQX08QYQB/0FeDg6HRhjNkQ4Tfb9WrK0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F9UJbf%2FbtsQX08QYQB%2F0FeDg6HRhjNkQ4Tfb9WrK0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1152&quot; height=&quot;110&quot; data-origin-width=&quot;1152&quot; data-origin-height=&quot;110&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;용량을 늘리고 싶은 가상머신으로 이동한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이동하면 Data 폴더가 나오고, 거기로 이동하면 .qcow2 확장자를 가진 파일이 나온다.&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1264&quot; data-origin-height=&quot;172&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bBCOV6/btsQWS4OYmP/HLyPluzbPTAQ1n56LXG5H1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bBCOV6/btsQWS4OYmP/HLyPluzbPTAQ1n56LXG5H1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bBCOV6/btsQWS4OYmP/HLyPluzbPTAQ1n56LXG5H1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbBCOV6%2FbtsQWS4OYmP%2FHLyPluzbPTAQ1n56LXG5H1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1264&quot; height=&quot;172&quot; data-origin-width=&quot;1264&quot; data-origin-height=&quot;172&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;용량을 늘리기 위해 다음 명령어를 입력한다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;background-color: #dddddd; color: #212529; text-align: start;&quot;&gt;qemu-img resize (파일 이름) (변경할 용량)&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1759253483516&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;qemu-img resize ~/Library/Containers/com.utmapp.UTM/Data/Documents/&quot;Ubuntu 20.04 for ARM.utm&quot;/Data/576799F2-EA9C-4B5C-AA7E-691CF8ED4735.qcow2 50G # 50기가로 확장했음&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;3. Ubuntu VM 안에서 확인&lt;/b&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1759253558188&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;lsblk&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;vda가 50GB까지 늘어났지만, 파티션 확장이 되지 않은 상태.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;내가 확장하고 싶은 것은 vda3임. &lt;i&gt;-&amp;gt; 자신이 사용하는대로 바꿔야 함&lt;/i&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #8a3db6;&quot;&gt;#2. 파티션 확장&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span&gt;1. growpart&lt;/span&gt; 사용&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;먼저 패키지 설치&lt;/p&gt;
&lt;pre id=&quot;code_1759253627487&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;sudo apt update
sudo apt install cloud-guest-utils
sudo growpart /dev/vda 3&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;rarr; &lt;span&gt;CHANGED: partition 3&lt;/span&gt; 메시지 출력 &amp;rarr; &lt;span&gt;vda3&lt;/span&gt;가 디스크 끝까지 확장됨&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;파티션이 늘어났다.&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1340&quot; data-origin-height=&quot;468&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/nM86k/btsQVX6ApYV/7IHO2Kr6I65EvMiwW41Ln1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/nM86k/btsQVX6ApYV/7IHO2Kr6I65EvMiwW41Ln1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/nM86k/btsQVX6ApYV/7IHO2Kr6I65EvMiwW41Ln1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FnM86k%2FbtsQVX6ApYV%2F7IHO2Kr6I65EvMiwW41Ln1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1340&quot; height=&quot;468&quot; data-origin-width=&quot;1340&quot; data-origin-height=&quot;468&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;하지만 아직 용량에는 반영되지 않은 상태.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;* &lt;span style=&quot;background-color: #dddddd;&quot;&gt;df -h&lt;/span&gt;로 확인 가능&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 style=&quot;color: #000000; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #8a3db6;&quot;&gt;#3. 용량 확장&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;1. LVM 물리 볼륨 크기 갱신&lt;/b&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1759253885274&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;sudo pvresize /dev/vda3&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;2. 논리 볼륨(LV) 확장&lt;/b&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1759253977794&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# sudo lvextend -L+{늘리고싶은용량}G /dev/mapper/ubuntu--vg-ubuntu--lv
sudo lvextend -L+20G /dev/mapper/ubuntu--vg-ubuntu--lv&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;20기가를 추가로 할당해주었다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;3. 파일시스템(ext4) 확장&lt;/b&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1759254004029&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;sudo resize2fs /dev/mapper/ubuntu--vg-ubuntu--lv&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;4. 최종 확인&lt;/b&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1759254016045&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;df -h&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1406&quot; data-origin-height=&quot;584&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/wI7UE/btsQWTiplxN/rw3ctk6EAk7ID2ChtnAgaK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/wI7UE/btsQWTiplxN/rw3ctk6EAk7ID2ChtnAgaK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/wI7UE/btsQWTiplxN/rw3ctk6EAk7ID2ChtnAgaK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FwI7UE%2FbtsQWTiplxN%2Frw3ctk6EAk7ID2ChtnAgaK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1406&quot; height=&quot;584&quot; data-origin-width=&quot;1406&quot; data-origin-height=&quot;584&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;43기가로 늘어난 것을 볼 수 있다.&lt;/p&gt;</description>
      <category>개발log/Ubuntu</category>
      <author>우연입니다</author>
      <guid isPermaLink="true">https://orchidbyw1.tistory.com/4</guid>
      <comments>https://orchidbyw1.tistory.com/4#entry4comment</comments>
      <pubDate>Wed, 1 Oct 2025 02:43:22 +0900</pubDate>
    </item>
    <item>
      <title>Nginx로 HTTPS 설정하기 (+lego)</title>
      <link>https://orchidbyw1.tistory.com/3</link>
      <description>&lt;h3 data-ke-size=&quot;size23&quot;&gt;HTTPS&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;HTTPS란 HyperText Transfer Protocol Secure의 약자로, 웹 통신 프로토콜인 HTTP의 보안 버전.&lt;br /&gt;HTTP는 단순 텍스트를 주고 받기 때문에, 서버와 클라이언트 사이에서 자원을 주고 받을 때 누군가 데이터를 가로챌 수 있다. 이 문제점을 HTTPS 프로토콜리 해결한다.&lt;br /&gt;공개키 암호화 방식을 사용하여 암호화한다.&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;SSL &amp;amp; TLS&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;SSL(Secure Sockets Layer)과 TLS(Transport Payer Security)은 네트워크 통신에서 보안을 제공하기 위해 사용되는 암호화 프로토콜.&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;HTTPS 통신 흐름&lt;/h3&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; start=&quot;0&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;CA(Certificate Authority)는 공개키 저장소로, CA 기업의 공개키는 브라우저가 이미 알고 있음!&lt;/li&gt;
&lt;li&gt;&lt;b&gt;기업은 서버의 공개키/개인키 생성&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;CA에 인증서 발급 요청&lt;/b&gt;&lt;br /&gt;CACA는 서버 정보를 포함한 인증서(Certificate)를 만들어서, CA의 개인키로 서명해서 서버에게 발급해줌.&lt;br /&gt;-&amp;gt; 이 인증서가 바로 &lt;b&gt;SSL&lt;/b&gt; 또는 &lt;b&gt;TLS&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;클라이언트(브라우저)가 서버에 접속 시도 (HTTPS)&lt;/b&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;클라이언트가 &lt;a href=&quot;https://example.com%EC%97%90&quot;&gt;https://example.com&lt;/a&gt;에 접속 시도&lt;/li&gt;
&lt;li&gt;서버는 자신의 인증서(CA의 서명이 포함됨)를 브라우저에게 보냄&lt;/li&gt;
&lt;li&gt;브라우저는 인증서의 서명을 검증하기 위해,&lt;br /&gt;&amp;bull; 인증서에 적힌 발급한 CA 이름을 확인하고&lt;br /&gt;&amp;bull; 해당 CA의 공개키로 서명을 복호화&lt;br /&gt;&amp;bull; 인증서 내용이 위조되지 않았음을 확인&lt;/li&gt;
&lt;li&gt;인증서 안의 서버 공개키를 얻음&lt;/li&gt;
&lt;/ol&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;Let's Encrypt&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;무료 SSL 인증서 보급. 자동화된 개방형 인증 기관(CA)&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;인증서 발급 도구(cerbot, lego 등)&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;HTTPS 활성화를 위해 Let&amp;rsquo;s Encrypt를 이용해 SSL/TLS 인증서를 발급받고, 갱신할 수 있도록 도와주는 무료 오픈소스 소프트웨어 도구&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;Nginx&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;정적 콘텐츠를 제공할 수 있는 웹서버(Web Server)&lt;br /&gt;또는 클라이언트의 요청을 내부 서버에 전달하고, 그 결과를 다시 클라이언트에게 전달하는 리버스 프록시(프론트맨)&lt;/p&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;Nginx+lego로 HTTPS 설정하기&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;순서는 다음과 같다.&lt;/p&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;nginx 설치&lt;/li&gt;
&lt;li&gt;lego 설치 및 ssl 인증서 발급&lt;/li&gt;
&lt;li&gt;nginx 설정 파일 업데이트&lt;/li&gt;
&lt;/ol&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;Nginx 설치&lt;/blockquote&gt;
&lt;pre class=&quot;bash&quot; data-ke-language=&quot;bash&quot;&gt;&lt;code&gt;# Nginx 설치
$ sudo apt install nginx

# Nginx 실행
$ sudo systemxtl start nginx
$ sudo systemxtl enable nginx&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;Lego 설치&lt;/blockquote&gt;
&lt;pre id=&quot;code_1755956590230&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# lego 클라이언트 설치
$ cd /tmp
$ curl -Ls https://api.github.com/repos/xenolf/lego/releases/latest | grep browser_download_url | grep linux_amd64 | cut -d '&quot;' -f 4 | wget -i - tar xf lego_vX.Y.Z_linux_amd64.tar.gz

# 다운받은 lego 압축파일명 확인
$ cd /tmp
$ curl -Ls https://api.github.com/repos/xenolf/lego/releases/latest | grep browser_download_url | grep linux_amd64 | cut -d '&quot;' -f 4 | wget -i - tar xf lego_vX.Y.Z_linux_amd64.tar.gz

# 파일 압축을 품
$ tar xf lego_v1.2.1_linux_amd64.tar.gz

# 생성된 lego 파일을 /usr/local/bin으로 옮김
$ sudo mv lego /usr/local/bin/lego

# 설치 확인
$ lego --version&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;SSL 인증서 발급&lt;/blockquote&gt;
&lt;pre id=&quot;code_1755957686092&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;$ sudo mkdir -p /etc/lego

# 아래 예시에서 이메일, 도메인, 웹 서버를 바꿔서 입력
$ sudo lego --email=&quot;you@example.com&quot;
	--domains=&quot;yourdomain.com&quot; --http \
    	--path=&quot;/etc/lego&quot;
    	run&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #666666; text-align: left;&quot;&gt;&lt;b&gt;/etc/nginx/sites-available/&lt;/b&gt; 안에 있는 설정 파일에서 다음 경로들이 맞는지 확인:&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #666666; text-align: left;&quot;&gt;- ssl_certificate /etc/lego/certificates/cau-chunghaha.p-e.kr.crt;&lt;br /&gt;- ssl_certificate_key /etc/lego/certificates/cau-chunghaha.p-e.kr.key;&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;NGINX 설정 파일 수정&lt;/blockquote&gt;
&lt;pre id=&quot;code_1755958446196&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# nginx 설정 수정
$ sudo vim /etc/nginx/sites-available/{도메인}&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1755958634750&quot; class=&quot;properties&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;bash&quot;&gt;&lt;code&gt;# /etc/nginx/sites-available/{도메인}
server {
    listen 80;
    server_name {도메인} www.{도메인};

    return 301 https://$host$request_uri;
}

server {
    listen 443 ssl;
    listen [::]:443 ssl;
    server_name {도메인} www.{도메인};

    ssl_certificate /etc/letsencrypt/live/{도메인}/fullchain.pem;
    ssl_certificate_key /etc/letsencrypt/live/{도메인}/privkey.pem;

    location / {
        proxy_pass http://127.0.0.1:8000;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    }

    location /static/ {
        alias /path/to/static/files/;
    }

    location /media/ {
        alias /path/to/media/files/;
    }
}&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1755958849823&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;$ sudo vim /etc/nginx/nginx.conf&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1755958861653&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;# /etc/nginx/nginx.conf

# http 블럭 마지막줄
# include /etc/nginx/sites-enabled/*; 를 찾아주세요.
http {
	include /etc/nginx/sites-enabled/{도메인};
}&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;심볼릭 링크 생성&lt;/p&gt;
&lt;pre id=&quot;code_1755958945404&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;$ sudo ln -s /etc/nginx/sites-available/{도메인} /etc/nginx/sites-enabled/&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;nginx 재시작&lt;/blockquote&gt;
&lt;pre id=&quot;code_1755959014413&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;$ sudo systemctl restart nginx
$ sudo nginx -t
$ sudo systemctl status nginx&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;도메인 인증서 만료일 확인&lt;/blockquote&gt;
&lt;pre id=&quot;code_1755959166564&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;$ openssl s_client -connect 도메인명:443 | openssl x509 -noout -dates&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;자동갱신&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;** nignx 끄고 시작하기!&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;갱신 스크립트 만들기&lt;/blockquote&gt;
&lt;pre id=&quot;code_1755959274389&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;$ sudo nano /usr/local/bin/renew-ssl.sh&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;아래 내용을 붙여넣음:&lt;/p&gt;
&lt;pre id=&quot;code_1755959300032&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;#!/bin/bash

# nginx 중지
systemctl stop nginx

# 인증서 갱신 시도
/usr/local/bin/lego \
  --email=&quot;you@example.com&quot; \
  --domains=&quot;yourdomain.com&quot; \
  --http --path=&quot;/etc/lego&quot; renew

# nginx 시작
systemctl start nginx&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;저장하고 나가기: Ctrl + X&lt;span&gt; &amp;rarr; &lt;/span&gt;Y&lt;span&gt; &amp;rarr; &lt;/span&gt;Enter&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;파일에 권한 줌:&lt;/p&gt;
&lt;pre id=&quot;code_1755959973743&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;$ sudo chmod +x /usr/local/bin/renew-ssl.sh&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;Cron으로 자동 실행 등록&lt;/blockquote&gt;
&lt;pre id=&quot;code_1755959511184&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;$ sudo crontab -e&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;맨 아래에 다음 줄 추가 (매일 새벽 3시 갱신 시도):&lt;/p&gt;
&lt;pre id=&quot;code_1755959527398&quot; class=&quot;bash&quot; data-ke-language=&quot;bash&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;0 3 * * * /usr/local/bin/renew-ssl.sh &amp;gt;&amp;gt; /var/log/ssl-renew.log 2&amp;gt;&amp;amp;1&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;잘 작동하는지 테스트&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;** nginx 재시작!&lt;/p&gt;
&lt;pre id=&quot;code_1755961702302&quot; class=&quot;awk&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;bash&quot;&gt;&lt;code&gt;$ sudo /usr/local/bin/renew-ssl.sh&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;참고&lt;br /&gt;&lt;a href=&quot;https://html-jc.tistory.com/736&quot;&gt;https://html-jc.tistory.com/736&lt;/a&gt;&lt;br /&gt;&lt;a href=&quot;https://serverok.in/lego&quot;&gt;https://serverok.in/lego&lt;/a&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;a href=&quot;https://github.com/rangyu/TIL/blob/master/bitnami/%EB%B9%84%ED%8A%B8%EB%82%98%EB%AF%B8-%EC%9B%8C%EB%93%9C%ED%94%84%EB%A0%88%EC%8A%A4%EC%97%90-Letsencrypt-%EC%9D%B8%EC%A6%9D%EC%84%9C-%EC%84%A4%EC%B9%98%ED%95%98%EA%B8%B0.md&quot; target=&quot;_blank&quot; rel=&quot;noopener&amp;nbsp;noreferrer&quot;&gt;https://github.com/rangyu/TIL/blob/master/bitnami/%EB%B9%84%ED%8A%B8%EB%82%98%EB%AF%B8-%EC%9B%8C%EB%93%9C%ED%94%84%EB%A0%88%EC%8A%A4%EC%97%90-Letsencrypt-%EC%9D%B8%EC%A6%9D%EC%84%9C-%EC%84%A4%EC%B9%98%ED%95%98%EA%B8%B0.md&lt;/a&gt;&lt;/p&gt;</description>
      <category>개발log/서버</category>
      <author>우연입니다</author>
      <guid isPermaLink="true">https://orchidbyw1.tistory.com/3</guid>
      <comments>https://orchidbyw1.tistory.com/3#entry3comment</comments>
      <pubDate>Sun, 24 Aug 2025 00:16:17 +0900</pubDate>
    </item>
  </channel>
</rss>