我想用加权版本替换mx.symbol.SoftmaxOutput(在整个数据集中为标签的频率分配不同的权重)
原始功能如下所示:
cls_prob = …
我不确定在这里使用customop是否最好,因为它可能很慢。 因为SoftmaxOuput计算向后传递中的梯度,所以不希望像你想要的那样增加损耗。 但是,使用符号API并不太复杂。我附上了一个玩具示例,希望它有所帮助。
import mxnet as mx import numpy as np import logging # learn floor function from random numbers in [-1, -1 + num_classes] n = 10000 batch_size = 128 num_classes = 10 x = (np.random.random((n,)) * num_classes) - 1 y = np.floor(x) print(x[:2]) print(y[:2]) # define graph data = mx.symbol.Variable('data') label = mx.symbol.Variable('label') class_weights = mx.symbol.Variable('class_weights') fc = mx.sym.FullyConnected(data=data, num_hidden=num_classes) fc = mx.sym.Activation(data=fc, act_type='relu') proba = mx.sym.FullyConnected(data=fc, num_hidden=num_classes) proba = mx.sym.softmax(proba) # multipy cross entropy loss by weight cross_entropy = -mx.sym.pick(proba, label) * mx.sym.pick(class_weights, label) # mask the loss to zero when label is -1 mask = mx.sym.broadcast_not_equal(label, mx.sym.ones_like(label) * -1) cross_entropy = cross_entropy * mask # fit module class_weights = np.array([np.arange(1, 1 + num_classes)]*n) data_iter = mx.io.NDArrayIter(data={'data': x, 'class_weights': class_weights}, label={'label': y}, batch_size=batch_size) mod = mx.mod.Module( mx.sym.Group([mx.sym.MakeLoss(cross_entropy, name='ce_loss'), mx.sym.BlockGrad(proba)]), data_names=[v.name for v in data_iter.provide_data], label_names=[v.name for v in data_iter.provide_label] ) logger = logging.getLogger() logger.setLevel(logging.DEBUG) mod.bind(data_shapes=data_iter.provide_data, label_shapes=data_iter.provide_label) mod.init_params() mod.fit( data_iter, num_epoch=200, optimizer=mx.optimizer.Adam(learning_rate=0.01, rescale_grad=1.0/batch_size), batch_end_callback=mx.callback.Speedometer(batch_size, 200), eval_metric=mx.metric.Loss(name="loss", output_names=["ce_loss_output"])) # show result, -1 are not predicted correctly as we did not compute their loss probas = mod.predict(data_iter)[1].asnumpy() print(zip(x, np.argmax(probas, axis=1)))