1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
import os.path as osp
import torch
import torch.nn.functional as F
from torch.nn import Linear
import torch_geometric.transforms as T
from torch_geometric.datasets import MovieLens
from torch_geometric.explain import CaptumExplainer, Explainer
from torch_geometric.nn import SAGEConv, to_hetero
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/MovieLens')
dataset = MovieLens(path, model_name='all-MiniLM-L6-v2')
data = dataset[0].to(device)
# Add user node features for message passing:
data['user'].x = torch.eye(data['user'].num_nodes, device=device)
del data['user'].num_nodes
# Add a reverse ('movie', 'rev_rates', 'user') relation for message passing:
data = T.ToUndirected()(data)
data['user', 'movie'].edge_label = data['user',
'movie'].edge_label.to(torch.float)
del data['movie', 'rev_rates', 'user'].edge_label # Remove "reverse" label.
# Perform a link-level split into training, validation, and test edges:
data, _, _ = T.RandomLinkSplit(
num_val=0.1,
num_test=0.1,
neg_sampling_ratio=0.0,
edge_types=[('user', 'rates', 'movie')],
rev_edge_types=[('movie', 'rev_rates', 'user')],
)(data)
class GNNEncoder(torch.nn.Module):
def __init__(self, hidden_channels, out_channels):
super().__init__()
self.conv1 = SAGEConv((-1, -1), hidden_channels)
self.conv2 = SAGEConv((-1, -1), out_channels)
def forward(self, x, edge_index):
x = self.conv1(x, edge_index).relu()
x = self.conv2(x, edge_index)
return x
class EdgeDecoder(torch.nn.Module):
def __init__(self, hidden_channels):
super().__init__()
self.lin1 = Linear(2 * hidden_channels, hidden_channels)
self.lin2 = Linear(hidden_channels, 1)
def forward(self, z_dict, edge_label_index):
row, col = edge_label_index
z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1)
z = self.lin1(z).relu()
z = self.lin2(z)
return z.view(-1)
class Model(torch.nn.Module):
def __init__(self, hidden_channels):
super().__init__()
self.encoder = GNNEncoder(hidden_channels, hidden_channels)
self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
self.decoder = EdgeDecoder(hidden_channels)
def forward(self, x_dict, edge_index_dict, edge_label_index):
z_dict = self.encoder(x_dict, edge_index_dict)
return self.decoder(z_dict, edge_label_index)
model = Model(hidden_channels=32).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for epoch in range(1, 10):
model.train()
optimizer.zero_grad()
pred = model(
data.x_dict,
data.edge_index_dict,
data['user', 'movie'].edge_label_index,
)
loss = F.mse_loss(pred, data['user', 'movie'].edge_label)
loss.backward()
optimizer.step()
explainer = Explainer(
model=model,
algorithm=CaptumExplainer('IntegratedGradients'),
explanation_type='model',
model_config=dict(
mode='regression',
task_level='edge',
return_type='raw',
),
node_mask_type='attributes',
edge_mask_type='object',
threshold_config=dict(
threshold_type='topk',
value=200,
),
)
index = torch.tensor([2, 10]) # Explain edge labels with index 2 and 10.
explanation = explainer(
data.x_dict,
data.edge_index_dict,
index=index,
edge_label_index=data['user', 'movie'].edge_label_index,
)
print(f'Generated explanations in {explanation.available_explanations}')
path = 'feature_importance.png'
explanation.visualize_feature_importance(path, top_k=10)
print(f"Feature importance plot has been saved to '{path}'")
|