如何将tensorflow1.x代码改写为pytorch代码(以图注意力网络(GAT)为例...--688IT编程网

如何将tensorflow1.x代码改写为pytorch代码（以图注意⼒⽹络

（GAT）为例）

之前讲解了图注意⼒⽹络的官⽅tensorflow版的实现，由于⾃⼰更了解pytorch，所以打算将其改写为pytorch版本的。

对于图注意⼒⽹络还不了解的可以先去看看tensorflow版本的代码，之前讲解的地址：

以下改写后的代码我已经上传到gihub上，地址为：

图注意⼒⽹络的官⽅代码使⽤的是tensorflow1.x版本的，地址为：

下⾯开始进⼊正题了。

1、tensorflow1.x的⼀般建模过程：

定义好训练的数据

定义计算图（包含占位）

定义训练主函数、损失函数计算、优化器

定义Session，参数初始化以及实际的前向传播和反向传播计算都是在Session中

2、将tensorflow转换为pytorch代码

其他数据处理的代码都是⼀致的，主要是⼀些需要改变的地⽅：

2.1 数据的读取

在tensorflow中，标签是要经过onehot编码的，⽽在pytorch中确是不⽤的，在load_data中：

def load_data(dataset_str): # {'pubmed', 'citeseer', 'cora'}

"""Load data."""

names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']

objects = []

for i in range(len(names)):

with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:

if sys.version_info > (3, 0):

objects.append(pkl.load(f, encoding='latin1'))

else:

objects.append(pkl.load(f))

x, y, tx, ty, allx, ally, graph = tuple(objects)

test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))

test_idx_range = np.sort(test_idx_reorder)

if dataset_str == 'citeseer':

# Fix citeseer dataset (there are some isolated nodes in the graph)

# Find isolated nodes, add them as zero-vecs into the right position

test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)

tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))

tx_extended[test_idx_range-min(test_idx_range), :] = tx

tx = tx_extended

ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))

ty_extended[test_idx_range-min(test_idx_range), :] = ty

ty = ty_extended

features = sp.vstack((allx, tx)).tolil()

features[test_idx_reorder, :] = features[test_idx_range, :]

adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

labels = np.vstack((ally, ty))

labels[test_idx_reorder, :] = labels[test_idx_range, :]

#pytorch的标签不需要进⾏one-hot编码

my_labels = np.where(labels==1)[1]

idx_test = test_list()

idx_train = range(len(y))

idx_val = range(len(y), len(y)+500)

train_my_labels_mask = sample_mask(idx_train, my_labels.shape[0])

val_my_labels_mask = sample_mask(idx_val, my_labels.shape[0])

test_my_labels_mask = sample_mask(idx_test, my_labels.shape[0])

train_my_labels = my_labels[train_my_labels_mask]

val_my_labels = my_labels[val_my_labels_mask]

test_my_labels = my_labels[test_my_labels_mask]

train_mask = sample_mask(idx_train, labels.shape[0])

val_mask = sample_mask(idx_val, labels.shape[0])

test_mask = sample_mask(idx_test, labels.shape[0])

y_train = np.zeros(labels.shape)

y_val = np.zeros(labels.shape)

y_test = np.zeros(labels.shape)

y_train[train_mask, :] = labels[train_mask, :]

y_val[val_mask, :] = labels[val_mask, :]

y_test[test_mask, :] = labels[test_mask, :]

print(adj.shape)

print(features.shape)

data_dict = {

'adj': adj,

'features': features,

'y_train': y_train,

'y_val': y_val,

'y_test': y_test,

'train_mask': train_mask,

'val_mask': val_mask,

'test_mask': test_mask,

'train_my_labels': train_my_labels,

'val_my_labels': val_my_labels,

'test_my_labels': test_my_labels,

'my_labels': my_labels

}

return data_dict

我们要使⽤np.where()函数，将每⼀个ont-hot编码中值为1的索引（也就是标签）取出来，然后在对其进⾏划分训练标签、验证标签和测试标签。

顺便提⼀下，当我们要返回的值很多的时候，可以⽤⼀个字典包装起来，最后返回该字典就⾏了，这符合python的编码规范。

2.2 注意⼒层的搭建

在tensorflow中：

conv1d = v1d

def attn_head(seq, out_sz, bias_mat, activation, in_drop=0.0, coef_drop=0.0, residual=False):

with tf.name_scope('my_attn'):

if in_drop != 0.0:

seq = tf.nn.dropout(seq, 1.0 - in_drop)

seq_fts = v1d(seq, out_sz, 1, use_bias=False)

# simplest self-attention possible

f_1 = v1d(seq_fts, 1, 1)

f_2 = v1d(seq_fts, 1, 1)

logits = f_1 + tf.transpose(f_2, [0, 2, 1])

coefs = tf.nn.leaky_relu(logits) + bias_mat)

if coef_drop != 0.0:

coefs = tf.nn.dropout(coefs, 1.0 - coef_drop)

if in_drop != 0.0:

seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)

vals = tf.matmul(coefs, seq_fts)

ret = tf.contrib.layers.bias_add(vals)

# residual connection

tensorflow版本选择if residual:

if seq.shape[-1] != ret.shape[-1]:

ret = ret + conv1d(seq, ret.shape[-1], 1) # activation

else:

ret = ret + seq

return activation(ret) # activation

直接就可以使⽤相关api进⾏计算，但是在pytorch中，⽆论是定义⾃⼰的层还是模型，都需要先建⽴，然后再使⽤（⼀般是这样）。改写后的代码如下：

import torch

as nn

class Attn_head(nn.Module):

def__init__(self,

in_channel,

out_sz,

bias_mat,

in_drop=0.0,

coef_drop=0.0,

activation=None,

residual=False):

super(Attn_head, self).__init__()

self.in_channel = in_channel

self.out_sz = out_sz

self.bias_mat = bias_mat

self.in_drop = in_drop

self.activation = activation

self.leakyrelu = nn.LeakyReLU()

self.softmax = nn.Softmax(dim=1)

#pytorch中dropout的参数p表⽰每个神经元⼀定概率失活

self.in_dropout = nn.Dropout()

def forward(self,x):

seq = x

if self.in_drop != 0.0:

seq = self.in_dropout(x)

seq_fts = v1(seq)

f_1 = v2_1(seq_fts)

f_2 = v2_2(seq_fts)

logits = f_1 + anspose(f_2, 2, 1)

logits = self.leakyrelu(logits)

coefs = self.softmax(logits + self.bias_mat)

f_drop !=0.0:

coefs = f_dropout(coefs)

if self.in_dropout !=0.0:

seq_fts = self.in_dropout(seq_fts)

ret = torch.matmul(coefs, anspose(seq_fts, 2, 1))

ret = anspose(ret, 2, 1)

sidual:

if seq.shape[1] != ret.shape[1]:

ret = ret + s_conv(seq)

else:

ret = ret + seq

return self.activation(ret)

要继承nn.Module类，然后在__init__中初始化相关参数以及对应的层，在forward中进⾏前向传播计算。

2.3 搭建模型

有了注意⼒层之后，就可以搭建模型了，tensorflow的代码：

def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop,

bias_mat, hid_units, n_heads, lu, residual=False):

attns = []

for _ in range(n_heads[0]):

attns.append(layers.attn_head(inputs, bias_mat=bias_mat,

out_sz=hid_units[0], activation=activation,

in_drop=ffd_drop, coef_drop=attn_drop, residual=False))

h_1 = tf.concat(attns, axis=-1)

for i in range(1, len(hid_units)):

h_old = h_1

attns = []

for _ in range(n_heads[i]):

attns.append(layers.attn_head(h_1, bias_mat=bias_mat,

out_sz=hid_units[i], activation=activation,

in_drop=ffd_drop, coef_drop=attn_drop, residual=residual))

h_1 = tf.concat(attns, axis=-1)

out = []

for i in range(n_heads[-1]):

out.append(layers.attn_head(h_1, bias_mat=bias_mat,

out_sz=nb_classes, activation=lambda x: x,

in_drop=ffd_drop, coef_drop=attn_drop, residual=False))

logits = tf.add_n(out) / n_heads[-1]

return logits

改写之后的pytorch代码：

import numpy as np

as nn

import torch

from layer import *

class GAT(nn.Module):

def__init__(self,

nb_classes,

nb_nodes,

attn_drop,

ffd_drop,

bias_mat,

hid_units,

n_heads,

residual=False):

super(GAT, self).__init__()

self.nb_classes = nb_classes

self.nb_nodes = nb_nodes

self.attn_drop = attn_drop

self.ffd_drop = ffd_drop

self.bias_mat = bias_mat

self.hid_units = hid_units

self.n_heads = n_heads

self.attn1 = Attn_head(in_channel=1433, out_sz=self.hid_units[0],

bias_mat=self.bias_mat, in_drop=self.ffd_drop,

coef_drop=self.attn_drop, activation=nn.ELU(),

sidual)

self.attn2 = Attn_head(in_channel=64, out_sz=self.nb_classes,

bias_mat=self.bias_mat, in_drop=self.ffd_drop,

coef_drop=self.attn_drop, activation=nn.ELU(),

sidual)

self.softmax = nn.Softmax(dim=1)

def forward(self, x):

attns = []

for _ in range(self.n_heads[0]):

attns.append(self.attn1(x))

h_1 = torch.cat(attns, dim=1)

out = self.attn2(h_1)

logits = anspose(out.view(self.nb_classes,-1), 1, 0)

logits = self.softmax(logits)

return logits

和tensorflow代码不同的是，这⾥我们仅仅定义了两层注意⼒。还需要注意的是，我们在__init__中定义相关层的时候，对于输⼊和输出的维度我们是要预先知道的，并填充进去，如果在forward中实际的值与预先定义的维度不同，那么就会报错。

2.4 进⾏训练、验证和测试

⾸先还是来看⼀下tensorflow是怎么定义的：

with tf.Graph().as_default():

with tf.name_scope('input'):

ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size))

bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))

lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes))

msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))

attn_drop = tf.placeholder(dtype=tf.float32, shape=())

ffd_drop = tf.placeholder(dtype=tf.float32, shape=())

is_train = tf.placeholder(dtype=tf.bool, shape=())

logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,

attn_drop, ffd_drop,

bias_mat=bias_in,

hid_units=hid_units, n_heads=n_heads,

residual=residual, activation=nonlinearity)

log_resh = tf.reshape(logits, [-1, nb_classes])

lab_resh = tf.reshape(lbl_in, [-1, nb_classes])

msk_resh = tf.reshape(msk_in, [-1])

loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)

accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

train_op = aining(loss, lr, l2_coef)

saver = tf.train.Saver()

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

vlss_mn = np.inf

vacc_mx = 0.0

curr_step = 0

with tf.Session() as sess:

sess.run(init_op)

train_loss_avg = 0

train_acc_avg = 0

val_loss_avg = 0

val_acc_avg = 0

for epoch in range(nb_epochs):

print("epoch: ",epoch)

tr_step = 0

tr_size = features.shape[0]

while tr_step * batch_size < tr_size:

_, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy],

feed_dict={

ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size],

bias_in: biases[tr_step*batch_size:(tr_step+1)*batch_size],

lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size],

msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size],

is_train: True,

attn_drop: 0.6, ffd_drop: 0.6})

train_loss_avg += loss_value_tr

train_acc_avg += acc_tr

tr_step += 1

vl_step = 0

vl_size = features.shape[0]

while vl_step * batch_size < vl_size:

loss_value_vl, acc_vl = sess.run([loss, accuracy],

feed_dict={

ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size],

bias_in: biases[vl_step*batch_size:(vl_step+1)*batch_size],

lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size],

msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size],

is_train: False,

attn_drop: 0.0, ffd_drop: 0.0})

val_loss_avg += loss_value_vl

val_acc_avg += acc_vl

vl_step += 1

print('Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' %

(train_loss_avg/tr_step, train_acc_avg/tr_step,

val_loss_avg/vl_step, val_acc_avg/vl_step))

if val_acc_avg/vl_step >= vacc_mx or val_loss_avg/vl_step <= vlss_mn:

if val_acc_avg/vl_step >= vacc_mx and val_loss_avg/vl_step <= vlss_mn:

vacc_early_model = val_acc_avg/vl_step

vlss_early_model = val_loss_avg/vl_step

saver.save(sess, checkpt_file)

vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx))

vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn))

curr_step = 0

else:

curr_step += 1

if curr_step == patience:

print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx)

print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model)

break

train_loss_avg = 0

train_acc_avg = 0

val_loss_avg = 0

val_acc_avg = 0

ts_size = features.shape[0]

ts_step = 0

ts_loss = 0.0

ts_acc = 0.0

while ts_step * batch_size < ts_size:

loss_value_ts, acc_ts = sess.run([loss, accuracy],

feed_dict={

ftr_in: features[ts_step*batch_size:(ts_step+1)*batch_size],

bias_in: biases[ts_step*batch_size:(ts_step+1)*batch_size],

lbl_in: y_test[ts_step*batch_size:(ts_step+1)*batch_size],

msk_in: test_mask[ts_step*batch_size:(ts_step+1)*batch_size],

is_train: False,

attn_drop: 0.0, ffd_drop: 0.0})

ts_loss += loss_value_ts

ts_acc += acc_ts

ts_step += 1

print('Test loss:', ts_loss/ts_step, '; Test accuracy:', ts_acc/ts_step)

sess.close()

就是建⽴图、然后在Session中执⾏。

688IT编程网

如何将tensorflow1.x代码改写为pytorch代码(以图注意力网络(GAT)为例...

发表评论

推荐文章

翻译三级笔译综合能力-21_真题-无答案

英语故事带翻译精选篇五篇

dreamweaver所以属性翻译,方便入门者学习

2017年考研英语二真题全文翻译解析(华明网校版)

(完整版)全新版大学英语(第二版)综合教程3课文原文及翻译

热门文章

英语新词汇与常用词汇的翻译-18

综合英语翻译

综合英语3课后翻译答案

0803翻译就英法联军远征中国给巴特勒上尉的信

大学精读英语教程(复旦大学出版)第三册:单词,翻译(2单元1...

高中英语单词归纳

mybelovedthing英语作文

发明一个对社会有用的东西作文

ai advantage英语作文

我想发明英文作文

与做家务有关的英语范文初二下册

《小屁孩日记荒野大冒险》读后感英语

邀请参加垃圾分类的志愿活动英语作文

九年级英语关于成为一名老师的作文

关于垃圾分类的宣传活动英语作文高中

doing houswork英语作文

七年级下册英语,1到3单元总结性作文

意林版英语六年级下册阳光练习第一单元答案

feek试音文本

关于太空的英语作文50字左右三年级

最新文章

英语故事带翻译精选篇五篇

(完整版)全新版大学英语(第二版)综合教程3课文原文及翻译

新视角研究生英语读说写1部分翻译

课后习题翻译部分

必修三第三单元课文及翻译

2023考研英语:真题核心词组翻译解析

标签列表

688IT编程网

如何将tensorflow1.x代码改写为pytorch代码(以图注意力网络(GAT)为例...

发表评论

推荐文章

翻译三级笔译综合能力-21_真题-无答案

英语故事带翻译精选篇五篇

dreamweaver所以属性翻译,方便入门者学习

2017年 考研英语二真题全文翻译解析(华明网校版)

(完整版)全新版大学英语(第二版)综合教程3课文原文及翻译

热门文章

英语新词汇与常用词汇的翻译-18

综合英语翻译

综合英语3课后翻译答案

0803翻译就英法联军远征中国给巴特勒上尉的信

大学精读英语教程(复旦大学出版)第三册:单词,翻译(2单元1...

高中英语单词归纳

mybelovedthing英语作文

发明一个对社会有用的东西作文

ai advantage英语作文

我想发明 英文作文

与做家务有关的英语范文初二下册

《小屁孩日记 荒野大冒险》读后感英语

邀请参加垃圾分类的志愿活动英语作文

九年级英语关于成为一名老师的作文

关于垃圾分类的宣传活动英语作文高中

doing houswork英语作文

七年级下册英语,1到3单元总结性作文

意林版英语六年级下册阳光练习第一单元答案

feek试音文本

关于太空的英语作文50字左右三年级

最新文章

英语故事带翻译精选篇五篇

(完整版)全新版大学英语(第二版)综合教程3课文原文及翻译

新视角研究生英语读说写1部分翻译

课后习题翻译部分

必修三第三单元课文及翻译

2023考研英语:真题核心词组翻译解析

标签列表

2017年考研英语二真题全文翻译解析(华明网校版)

我想发明英文作文

《小屁孩日记荒野大冒险》读后感英语