system info is :
Apple M1 Pro
Monterey 12.1
With the increase of epochs, the memory usage in the activity monitor is also rapidly increasing, and can even reach 100G, and then the computer restarts.......
use memory_profiler tools,show that the model does not consume so much memory。
Even if the model consumes a lot of memory, it should be a memory overflow, not a computer restart。
Below is a small demo:
from tensorflow import keras as k
from keras.models import Sequential
from keras.layers import Dense,Activation
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from memory_profiler import profile
input_dim = 500
model_layers =10
class my_model(k.Model):
def __init__(self,input_dim):
super(my_model, self).__init__()
self.input_dim = input_dim
self.denses = [Dense(32, activation='relu') for item in range(input_dim)]
self.l2 = Dense(2)
def call(self,inputs):
for i in range((self.input_dim)):
inputs = self.denses[i](inputs)
inputs = self.l2(inputs)
return inputs
batch_all = 10000000
data = np.random.random((batch_all, input_dim))
y = np.random.randint(2, size=(batch_all))
model = my_model(model_layers)
epochs = 10
def compute_loss(predict,ground_true):
loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=predict,
labels= tf.one_hot(ground_true,depth=2,axis = -1))
return loss
batch_size =4096*2
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
m = tf.keras.metrics.Accuracy()
from keras import backend as k
from tensorflow.python.framework import ops
import psutil
def train(batch_size):
with tf.GradientTape() as tp:
input_x = data[num:num+batch_size]
labels = y[num:num+batch_size]
predict = model(input_x)
loss = compute_loss(predict,labels)
vears = tp.watched_variables()
grads = tp.gradient(loss,vears)
for item in tqdm(range(epochs)) :
for num in tqdm(range(0, batch_all, batch_size)):
memory_profiler_buiild.log is shown below:
Line # Mem usage Increment Occurences Line Contents
45 4514.1562 MiB 4514.1562 MiB 1 @profile(precision=4,stream=open('../logs/keras_test/memory_profiler_buiild.log','w+'))
46 def train(batch_size):
48 4514.1562 MiB 0.0000 MiB 1 with tf.GradientTape() as tp:
50 4514.1562 MiB 0.0000 MiB 1 input_x = data[num:num+batch_size]
51 4514.1562 MiB 0.0000 MiB 1 labels = y[num:num+batch_size]
52 4514.1562 MiB 0.0000 MiB 1 predict = model(input_x)
54 4514.1562 MiB 0.0000 MiB 1 loss = compute_loss(predict,labels)
55 4514.1562 MiB 0.0000 MiB 1 vears = tp.watched_variables()
56 4514.1562 MiB 0.0000 MiB 1 grads = tp.gradient(loss,vears)
58 4514.1719 MiB 0.0156 MiB 1 optimizer.apply_gradients(zip(grads,vears))
the memory show in activity monitor is shown as 37G.
how to solve this problem? When the memory leak, I expect the program to be killed, not the computer to restart!!!!