system info is :
Apple M1 Pro
Monterey 12.1
32GB
tensorflow-metal:0.2.0
tensorflow-macos:2.6.0
With the increase of epochs, the memory usage in the activity monitor is also rapidly increasing, and can even reach 100G, and then the computer restarts.......
use memory_profiler tools,show that the model does not consume so much memory。
Even if the model consumes a lot of memory, it should be a memory overflow, not a computer restart。
Below is a small demo:
from tensorflow import keras as k
from keras.models import Sequential
from keras.layers import Dense,Activation
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from memory_profiler import profile
input_dim = 500
model_layers =10
class my_model(k.Model):
def __init__(self,input_dim):
super(my_model, self).__init__()
self.input_dim = input_dim
self.denses = [Dense(32, activation='relu') for item in range(input_dim)]
self.l2 = Dense(2)
def call(self,inputs):
for i in range((self.input_dim)):
inputs = self.denses[i](inputs)
inputs = self.l2(inputs)
return inputs
batch_all = 10000000
data = np.random.random((batch_all, input_dim))
y = np.random.randint(2, size=(batch_all))
model = my_model(model_layers)
epochs = 10
def compute_loss(predict,ground_true):
loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=predict,
labels= tf.one_hot(ground_true,depth=2,axis = -1))
return loss
batch_size =4096*2
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
m = tf.keras.metrics.Accuracy()
from keras import backend as k
from tensorflow.python.framework import ops
import psutil
@profile(precision=4,stream=open('../logs/keras_test/memory_profiler_buiild.log','w+'))
def train(batch_size):
with tf.GradientTape() as tp:
input_x = data[num:num+batch_size]
labels = y[num:num+batch_size]
predict = model(input_x)
loss = compute_loss(predict,labels)
vears = tp.watched_variables()
grads = tp.gradient(loss,vears)
optimizer.apply_gradients(zip(grads,vears))
for item in tqdm(range(epochs)) :
for num in tqdm(range(0, batch_all, batch_size)):
train(batch_size)
memory_profiler_buiild.log is shown below:
Line # Mem usage Increment Occurences Line Contents
============================================================
45 4514.1562 MiB 4514.1562 MiB 1 @profile(precision=4,stream=open('../logs/keras_test/memory_profiler_buiild.log','w+'))
46 def train(batch_size):
47
48 4514.1562 MiB 0.0000 MiB 1 with tf.GradientTape() as tp:
49
50 4514.1562 MiB 0.0000 MiB 1 input_x = data[num:num+batch_size]
51 4514.1562 MiB 0.0000 MiB 1 labels = y[num:num+batch_size]
52 4514.1562 MiB 0.0000 MiB 1 predict = model(input_x)
53
54 4514.1562 MiB 0.0000 MiB 1 loss = compute_loss(predict,labels)
55 4514.1562 MiB 0.0000 MiB 1 vears = tp.watched_variables()
56 4514.1562 MiB 0.0000 MiB 1 grads = tp.gradient(loss,vears)
57
58 4514.1719 MiB 0.0156 MiB 1 optimizer.apply_gradients(zip(grads,vears))
the memory show in activity monitor is shown as 37G.
how to solve this problem? When the memory leak, I expect the program to be killed, not the computer to restart!!!!