.requires_grad,.detach(),torch.no_grad()

让模型参数的requires_grad=False:
 

import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        # let the parameters of layer2 not require gradients
        for param in self.layer2.parameters():
            param.requires_grad = False
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        check_tensor_properties(x, "x")  # requires_grad=False, is_leaf=True. just because it's the input tensor

        x_ = torch.relu(self.layer1(x))
        check_tensor_properties(x_, "x_")  # requires_grad=True, is_leaf=False

        y = torch.relu(self.layer2(x_))  # layer2's parameters do not require gradients
        check_tensor_properties(y, "y")  # requires_grad=True, is_leaf=False

        t = self.layer3(y)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
check_tensor_properties(inputs, "inputs")  # requires_grad=False, is_leaf=True

targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # not None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

detach:

import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')

class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer2 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))

        x = torch.relu(self.layer2(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        z = x.detach()  # detach x from the computation graph
        check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True
        
        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer2 gradients:")
for param in model.layer2.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

torch.no_grad():

import torch
import torch.nn as nn
import torch.optim as optim


def check_tensor_properties(tensor, name):
    print(f"Tensor name: {name}")
    print("Requires gradient:", tensor.requires_grad)
    print("Is leaf:", tensor.is_leaf)
    print('\n')


class SelectiveGradientModel(nn.Module):
    def __init__(self):
        super(SelectiveGradientModel, self).__init__()
        self.layer1 = nn.Linear(2, 2)
        self.layer20 = nn.Linear(2, 2)
        self.layer21 = nn.Linear(2, 2)
        self.layer3 = nn.Linear(2, 2)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        check_tensor_properties(x, "x")  # requires_grad=True, is_leaf=False

        with torch.no_grad():  # all the tensors created in this block will not require gradients and be leaf tensors
            y = torch.relu(self.layer20(x))
            check_tensor_properties(y, "y")  # requires_grad=False, is_leaf=True

            z = torch.relu(self.layer21(y))
            check_tensor_properties(z, "z")  # requires_grad=False, is_leaf=True

        t = self.layer3(z)
        check_tensor_properties(t, "t")  # requires_grad=True, is_leaf=False

        return t


torch.manual_seed(2)
model = SelectiveGradientModel()

optimizer = optim.SGD(model.parameters(), lr=0.1)
criterion = nn.MSELoss()

inputs = torch.randn(1, 2)
targets = torch.randn(1, 2)

outputs = model(inputs)
loss = criterion(outputs, targets)

loss.backward()

print("Layer1 gradients:")
for param in model.layer1.parameters():
    print(param.grad)  # None

print("Layer20 gradients:")
for param in model.layer20.parameters():
    print(param.grad)  # None

print("Layer21 gradients:")
for param in model.layer21.parameters():
    print(param.grad)  # None

print("Layer3 gradients:")
for param in model.layer3.parameters():
    print(param.grad)  # not None

相关推荐

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2024-05-01 22:42:01       98 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2024-05-01 22:42:01       106 阅读
  3. 在Django里面运行非项目文件

    2024-05-01 22:42:01       87 阅读
  4. Python语言-面向对象

    2024-05-01 22:42:01       96 阅读

热门阅读

  1. C/C++中的整数除法运算与汇编指令DIV和IDIV

    2024-05-01 22:42:01       35 阅读
  2. 如何看待AIGC技术

    2024-05-01 22:42:01       29 阅读
  3. Leetcode 590:N叉树的后序遍历

    2024-05-01 22:42:01       35 阅读
  4. Ubuntu 4G模块域名ping不通

    2024-05-01 22:42:01       29 阅读
  5. 一篇文章讲完 Gorm 入门所有使用

    2024-05-01 22:42:01       26 阅读
  6. UIButton中addTarget和addAction有什么区别

    2024-05-01 22:42:01       29 阅读
  7. 展开说说:Android动画之自定义动画

    2024-05-01 22:42:01       31 阅读