0

I am using the ogb molhiv dataset for graph classification, I imported the data and created the DataLoader following the ogb documentation. The data is composed of 41127 graphs and there are 2 classes. Below is the code to implement the neural network in PyTorch, and my train and test functions. I have tried implementing other neural networks, tried not to use Dataloaders, changed the loss function as well as the optimizer but the model does not seem to be learning the evaluator provided by ogb keeps giving me an accuracy of 50%. Anyone can help me identify the issue.

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(100, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch,edge_attr):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)

        # 2. Readout layer
        x = pyg_nn.global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        
        return x

model = GCN(hidden_channels=64)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
evaluator = Evaluator(name='ogbg-molhiv')
def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         data.x = atom_encoder(data.x)
         data.edge_attr = bond_encoder(data.edge_attr)
         #data.batch = data.batch.to(device)
         out = model(data.x, data.edge_index, data.batch,data.edge_attr)  # Perform a single forward pass.
         loss = F.nll_loss(out, data.y.squeeze(1))  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()
     y_true = []
     y_pred = []
     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         data.x = atom_encoder(data.x)
         data.edge_attr = bond_encoder(data.edge_attr)
         out = model(data.x, data.edge_index, data.batch,data.edge_attr)  
         y_pred_batch = out.argmax(dim=1)  # Use the class with highest probability.
         y_true.append(data.y.view(y_pred_batch.shape).detach().cpu())     
         y_pred.append(y_pred_batch.detach().cpu())
     y_true = torch.cat(y_true, dim = 0).numpy()
     y_pred = torch.cat(y_pred, dim = 0).numpy()
     acc = evaluator.eval({'y_true': y_true.reshape(len(y_true),1),
                                'y_pred': y_pred.reshape(len(y_true),1),
                               })
     return acc['rocauc']  # Derive ratio of correct predictions.


for epoch in range(1, 171):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
#171 epochs
Epoch: 001, Train Acc: 0.5000, Test Acc: 0.5000
Epoch: 002, Train Acc: 0.5000, Test Acc: 0.5000
Epoch: 003, Train Acc: 0.5000, Test Acc: 0.5000
Epoch: 004, Train Acc: 0.5000, Test Acc: 0.5000
Epoch: 005, Train Acc: 0.5000, Test Acc: 0.5000
.
.
.
Ethan
  • 1,625
  • 8
  • 23
  • 39
edak
  • 3
  • 2

0 Answers0