735 lines
31 KiB
Python
735 lines
31 KiB
Python
import os
|
|
import uuid
|
|
import argparse
|
|
import logging
|
|
import logging.config
|
|
import pandas as pd
|
|
import sys
|
|
|
|
import torch
|
|
import numpy as np
|
|
import time
|
|
|
|
from collections import defaultdict as ddict
|
|
from pprint import pprint
|
|
|
|
from ordered_set import OrderedSet
|
|
from torch.utils.data import DataLoader
|
|
|
|
from data_loader import TrainDataset, TestDataset
|
|
from utils import get_logger, get_combined_results, set_gpu, prepare_env, set_seed
|
|
|
|
from models import ComplEx, ConvE, HypER, InteractE, FouriER, TuckER
|
|
import traceback
|
|
|
|
|
|
class Main(object):
|
|
|
|
def __init__(self, params, logger):
|
|
"""
|
|
Constructor of the runner class
|
|
Parameters
|
|
----------
|
|
params: List of hyper-parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
Creates computational graph and optimizer
|
|
|
|
"""
|
|
self.p = params
|
|
self.logger = logger
|
|
|
|
self.logger.info(vars(self.p))
|
|
|
|
if self.p.gpu != '-1' and torch.cuda.is_available():
|
|
self.device = torch.device('cuda')
|
|
torch.cuda.set_rng_state(torch.cuda.get_rng_state())
|
|
torch.backends.cudnn.deterministic = True
|
|
else:
|
|
self.device = torch.device('cpu')
|
|
|
|
self.load_data()
|
|
self.model = self.add_model()
|
|
self.optimizer = self.add_optimizer(self.model.parameters())
|
|
|
|
def load_data(self):
|
|
"""
|
|
Reading in raw triples and converts it into a standard format.
|
|
Parameters
|
|
----------
|
|
self.p.dataset: Takes in the name of the dataset (FB15k-237, WN18RR, YAGO3-10)
|
|
|
|
Returns
|
|
-------
|
|
self.ent2id: Entity to unique identifier mapping
|
|
self.id2rel: Inverse mapping of self.ent2id
|
|
self.rel2id: Relation to unique identifier mapping
|
|
self.num_ent: Number of entities in the Knowledge graph
|
|
self.num_rel: Number of relations in the Knowledge graph
|
|
self.embed_dim: Embedding dimension used
|
|
self.data['train']: Stores the triples corresponding to training dataset
|
|
self.data['valid']: Stores the triples corresponding to validation dataset
|
|
self.data['test']: Stores the triples corresponding to test dataset
|
|
self.data_iter: The dataloader for different data splits
|
|
self.chequer_perm: Stores the Chequer reshaping arrangement
|
|
"""
|
|
|
|
ent_set, rel_set = OrderedSet(), OrderedSet()
|
|
for split in ['train', 'test', 'valid']:
|
|
for line in open('./data/{}/{}.txt'.format(self.p.dataset, split)):
|
|
sub, rel, obj, *_ = map(str.lower, line.strip().split('\t'))
|
|
ent_set.add(sub)
|
|
rel_set.add(rel)
|
|
ent_set.add(obj)
|
|
|
|
self.ent2id = {}
|
|
for line in open('./data/{}/{}'.format(self.p.dataset, "entities.dict")):
|
|
id, ent = map(str.lower, line.replace('\xa0', '').strip().split('\t'))
|
|
self.ent2id[ent] = int(id)
|
|
self.rel2id = {}
|
|
for line in open('./data/{}/{}'.format(self.p.dataset, "relations.dict")):
|
|
id, rel = map(str.lower, line.strip().split('\t'))
|
|
self.rel2id[rel] = int(id)
|
|
|
|
# self.ent2id = {ent: idx for idx, ent in enumerate(ent_set)}
|
|
# self.rel2id = {rel: idx for idx, rel in enumerate(rel_set)}
|
|
self.rel2id.update({rel+'_reverse': idx+len(self.rel2id)
|
|
for idx, rel in enumerate(rel_set)})
|
|
|
|
self.id2ent = {idx: ent for ent, idx in self.ent2id.items()}
|
|
self.id2rel = {idx: rel for rel, idx in self.rel2id.items()}
|
|
|
|
self.p.num_ent = len(self.ent2id)
|
|
self.p.num_rel = len(self.rel2id) // 2
|
|
self.p.embed_dim = self.p.k_w * \
|
|
self.p.k_h if self.p.embed_dim is None else self.p.embed_dim
|
|
|
|
self.data = ddict(list)
|
|
sr2o = ddict(set)
|
|
|
|
for split in ['train', 'test', 'valid']:
|
|
for line in open('./data/{}/{}.txt'.format(self.p.dataset, split)):
|
|
sub, rel, obj, *_ = map(str.lower, line.replace('\xa0', '').strip().split('\t'))
|
|
sub, rel, obj = self.ent2id[sub], self.rel2id[rel], self.ent2id[obj]
|
|
self.data[split].append((sub, rel, obj))
|
|
|
|
if split == 'train':
|
|
sr2o[(sub, rel)].add(obj)
|
|
sr2o[(obj, rel+self.p.num_rel)].add(sub)
|
|
self.data = dict(self.data)
|
|
|
|
self.sr2o = {k: list(v) for k, v in sr2o.items()}
|
|
for split in ['test', 'valid']:
|
|
for sub, rel, obj in self.data[split]:
|
|
sr2o[(sub, rel)].add(obj)
|
|
sr2o[(obj, rel+self.p.num_rel)].add(sub)
|
|
|
|
self.sr2o_all = {k: list(v) for k, v in sr2o.items()}
|
|
|
|
self.triples = ddict(list)
|
|
|
|
if self.p.train_strategy == 'one_to_n':
|
|
for (sub, rel), obj in self.sr2o.items():
|
|
self.triples['train'].append(
|
|
{'triple': (sub, rel, -1), 'label': self.sr2o[(sub, rel)], 'sub_samp': 1})
|
|
else:
|
|
for sub, rel, obj in self.data['train']:
|
|
rel_inv = rel + self.p.num_rel
|
|
sub_samp = len(self.sr2o[(sub, rel)]) + \
|
|
len(self.sr2o[(obj, rel_inv)])
|
|
sub_samp = np.sqrt(1/sub_samp)
|
|
|
|
self.triples['train'].append({'triple': (
|
|
sub, rel, obj), 'label': self.sr2o[(sub, rel)], 'sub_samp': sub_samp})
|
|
self.triples['train'].append({'triple': (
|
|
obj, rel_inv, sub), 'label': self.sr2o[(obj, rel_inv)], 'sub_samp': sub_samp})
|
|
|
|
for split in ['test', 'valid']:
|
|
for sub, rel, obj in self.data[split]:
|
|
rel_inv = rel + self.p.num_rel
|
|
self.triples['{}_{}'.format(split, 'tail')].append(
|
|
{'triple': (sub, rel, obj), 'label': self.sr2o_all[(sub, rel)]})
|
|
self.triples['{}_{}'.format(split, 'head')].append(
|
|
{'triple': (obj, rel_inv, sub), 'label': self.sr2o_all[(obj, rel_inv)]})
|
|
|
|
self.triples = dict(self.triples)
|
|
|
|
def get_data_loader(dataset_class, split, batch_size, shuffle=True):
|
|
return DataLoader(
|
|
dataset_class(self.triples[split], self.p),
|
|
batch_size=batch_size,
|
|
shuffle=shuffle,
|
|
num_workers=max(0, self.p.num_workers),
|
|
collate_fn=dataset_class.collate_fn
|
|
)
|
|
|
|
self.data_iter = {
|
|
'train' : get_data_loader(TrainDataset, 'train', self.p.batch_size),
|
|
'valid_head' : get_data_loader(TestDataset, 'valid_head', self.p.batch_size),
|
|
'valid_tail' : get_data_loader(TestDataset, 'valid_tail', self.p.batch_size),
|
|
'test_head' : get_data_loader(TestDataset, 'test_head', self.p.batch_size),
|
|
'test_tail' : get_data_loader(TestDataset, 'test_tail', self.p.batch_size),
|
|
}
|
|
|
|
self.chequer_perm = self.get_chequer_perm()
|
|
|
|
def get_chequer_perm(self):
|
|
"""
|
|
Function to generate the chequer permutation required for InteractE model
|
|
Parameters
|
|
----------
|
|
|
|
Returns
|
|
-------
|
|
|
|
"""
|
|
ent_perm = np.int32([np.random.permutation(self.p.embed_dim)
|
|
for _ in range(self.p.perm)])
|
|
rel_perm = np.int32([np.random.permutation(self.p.embed_dim)
|
|
for _ in range(self.p.perm)])
|
|
|
|
comb_idx = []
|
|
for k in range(self.p.perm):
|
|
temp = []
|
|
ent_idx, rel_idx = 0, 0
|
|
|
|
for i in range(self.p.k_h):
|
|
for j in range(self.p.k_w):
|
|
if k % 2 == 0:
|
|
if i % 2 == 0:
|
|
temp.append(ent_perm[k, ent_idx])
|
|
ent_idx += 1
|
|
temp.append(rel_perm[k, rel_idx]+self.p.embed_dim)
|
|
rel_idx += 1
|
|
else:
|
|
temp.append(rel_perm[k, rel_idx]+self.p.embed_dim)
|
|
rel_idx += 1
|
|
temp.append(ent_perm[k, ent_idx])
|
|
ent_idx += 1
|
|
else:
|
|
if i % 2 == 0:
|
|
temp.append(rel_perm[k, rel_idx]+self.p.embed_dim)
|
|
rel_idx += 1
|
|
temp.append(ent_perm[k, ent_idx])
|
|
ent_idx += 1
|
|
else:
|
|
temp.append(ent_perm[k, ent_idx])
|
|
ent_idx += 1
|
|
temp.append(rel_perm[k, rel_idx]+self.p.embed_dim)
|
|
rel_idx += 1
|
|
|
|
comb_idx.append(temp)
|
|
|
|
chequer_perm = torch.LongTensor(np.int32(comb_idx)).to(self.device)
|
|
return chequer_perm
|
|
|
|
def add_model(self):
|
|
"""
|
|
Creates the computational graph
|
|
Parameters
|
|
----------
|
|
|
|
Returns
|
|
-------
|
|
Creates the computational graph for model and initializes it
|
|
|
|
"""
|
|
model = FouriER(self.p)
|
|
model.to(self.device)
|
|
return model
|
|
|
|
def add_optimizer(self, parameters):
|
|
"""
|
|
Creates an optimizer for training the parameters
|
|
Parameters
|
|
----------
|
|
parameters: The parameters of the model
|
|
|
|
Returns
|
|
-------
|
|
Returns an optimizer for learning the parameters of the model
|
|
|
|
"""
|
|
|
|
if self.p.opt == 'adam':
|
|
return torch.optim.Adam(parameters, lr=self.p.lr, weight_decay=self.p.l2)
|
|
else:
|
|
return torch.optim.SGD(parameters, lr=self.p.lr, weight_decay=self.p.l2)
|
|
|
|
|
|
def read_batch(self, batch, split):
|
|
"""
|
|
Function to read a batch of data and move the tensors in batch to CPU/GPU
|
|
Parameters
|
|
----------
|
|
batch: the batch to process
|
|
split: (string) If split == 'train', 'valid' or 'test' split
|
|
|
|
Returns
|
|
-------
|
|
triples: The triples used for this split
|
|
labels: The label for each triple
|
|
"""
|
|
if split == 'train':
|
|
if self.p.train_strategy == 'one_to_x':
|
|
triple, label, neg_ent, sub_samp = [
|
|
_.to(self.device) for _ in batch]
|
|
return triple[:, 0], triple[:, 1], triple[:, 2], label, neg_ent, sub_samp
|
|
else:
|
|
triple, label = [_.to(self.device) for _ in batch]
|
|
return triple[:, 0], triple[:, 1], triple[:, 2], label, None, None
|
|
else:
|
|
triple, label = [_.to(self.device) for _ in batch]
|
|
return triple[:, 0], triple[:, 1], triple[:, 2], label
|
|
|
|
def save_model(self, save_path):
|
|
"""
|
|
Function to save a model. It saves the model parameters, best validation scores,
|
|
best epoch corresponding to best validation, state of the optimizer and all arguments for the run.
|
|
Parameters
|
|
----------
|
|
save_path: path where the model is saved
|
|
|
|
Returns
|
|
-------
|
|
"""
|
|
state = {
|
|
'state_dict' : self.model.state_dict(),
|
|
'best_val' : self.best_val,
|
|
'best_epoch' : self.best_epoch,
|
|
'optimizer' : self.optimizer.state_dict(),
|
|
'args' : vars(self.p)
|
|
}
|
|
torch.save(state, save_path)
|
|
|
|
def load_model(self, load_path):
|
|
"""
|
|
Function to load a saved model
|
|
Parameters
|
|
----------
|
|
load_path: path to the saved model
|
|
|
|
Returns
|
|
-------
|
|
"""
|
|
state = torch.load(load_path)
|
|
state_dict = state['state_dict']
|
|
self.best_val_mrr = state['best_val']['mrr']
|
|
self.best_val = state['best_val']
|
|
|
|
self.model.load_state_dict(state_dict)
|
|
self.optimizer.load_state_dict(state['optimizer'])
|
|
|
|
# def evaluate(self, split, epoch=0):
|
|
# """
|
|
# Function to evaluate the model on validation or test set
|
|
# Parameters
|
|
# ----------
|
|
# split: (string) If split == 'valid' then evaluate on the validation set, else the test set
|
|
# epoch: (int) Current epoch count
|
|
|
|
# Returns
|
|
# -------
|
|
# resutls: The evaluation results containing the following:
|
|
# results['mr']: Average of ranks_left and ranks_right
|
|
# results['mrr']: Mean Reciprocal Rank
|
|
# results['hits@k']: Probability of getting the correct preodiction in top-k ranks based on predicted score
|
|
# """
|
|
# left_results = self.predict(split=split, mode='tail_batch')
|
|
# right_results = self.predict(split=split, mode='head_batch')
|
|
# results = get_combined_results(left_results, right_results)
|
|
# self.logger.info('[Epoch {} {}]: MRR: Tail : {:.5}, Head : {:.5}, Avg : {:.5}'.format(
|
|
# epoch, split, results['left_mrr'], results['right_mrr'], results['mrr']))
|
|
# return results
|
|
|
|
def evaluate(self, split, epoch=0):
|
|
"""
|
|
Function to evaluate the model on validation or test set
|
|
Parameters
|
|
----------
|
|
split: (string) If split == 'valid' then evaluate on the validation set, else the test set
|
|
epoch: (int) Current epoch count
|
|
|
|
Returns
|
|
-------
|
|
resutls: The evaluation results containing the following:
|
|
results['mr']: Average of ranks_left and ranks_right
|
|
results['mrr']: Mean Reciprocal Rank
|
|
results['hits@k']: Probability of getting the correct preodiction in top-k ranks based on predicted score
|
|
"""
|
|
left_results = self.predict(split=split, mode='tail_batch')
|
|
right_results = self.predict(split=split, mode='head_batch')
|
|
results = get_combined_results(left_results, right_results)
|
|
res_mrr = '\n\tMRR: Tail : {:.5}, Head : {:.5}, Avg : {:.5}\n'.format(results['left_mrr'],
|
|
results['right_mrr'],
|
|
results['mrr'])
|
|
res_mr = '\tMR: Tail : {:.5}, Head : {:.5}, Avg : {:.5}\n'.format(results['left_mr'],
|
|
results['right_mr'],
|
|
results['mr'])
|
|
res_hit1 = '\tHit-1: Tail : {:.5}, Head : {:.5}, Avg : {:.5}\n'.format(results['left_hits@1'],
|
|
results['right_hits@1'],
|
|
results['hits@1'])
|
|
res_hit3 = '\tHit-3: Tail : {:.5}, Head : {:.5}, Avg : {:.5}\n'.format(results['left_hits@3'],
|
|
results['right_hits@3'],
|
|
results['hits@3'])
|
|
res_hit10 = '\tHit-10: Tail : {:.5}, Head : {:.5}, Avg : {:.5}'.format(results['left_hits@10'],
|
|
results['right_hits@10'],
|
|
results['hits@10'])
|
|
log_res = res_mrr + res_mr + res_hit1 + res_hit3 + res_hit10
|
|
if (epoch + 1) % 10 == 0 or split == 'test':
|
|
self.logger.info(
|
|
'[Evaluating Epoch {} {}]: {}'.format(epoch, split, log_res))
|
|
else:
|
|
self.logger.info(
|
|
'[Evaluating Epoch {} {}]: {}'.format(epoch, split, res_mrr))
|
|
|
|
return results
|
|
|
|
def predict(self, split='valid', mode='tail_batch'):
|
|
"""
|
|
Function to run model evaluation for a given mode
|
|
Parameters
|
|
----------
|
|
split: (string) If split == 'valid' then evaluate on the validation set, else the test set
|
|
mode: (string): Can be 'head_batch' or 'tail_batch'
|
|
|
|
Returns
|
|
-------
|
|
resutls: The evaluation results containing the following:
|
|
results['mr']: Average of ranks_left and ranks_right
|
|
results['mrr']: Mean Reciprocal Rank
|
|
results['hits@k']: Probability of getting the correct preodiction in top-k ranks based on predicted score
|
|
"""
|
|
self.model.eval()
|
|
|
|
with torch.no_grad():
|
|
results = {}
|
|
train_iter = iter(
|
|
self.data_iter['{}_{}'.format(split, mode.split('_')[0])])
|
|
|
|
sub_all = []
|
|
obj_all = []
|
|
rel_all = []
|
|
target_score = []
|
|
target_rank = []
|
|
obj_pred = []
|
|
obj_pred_score = []
|
|
for step, batch in enumerate(train_iter):
|
|
sub, rel, obj, label = self.read_batch(batch, split)
|
|
pred = self.model.forward(sub, rel, None, 'one_to_n')
|
|
b_range = torch.arange(pred.size()[0], device=self.device)
|
|
target_pred = pred[b_range, obj]
|
|
pred = torch.where(label.byte(), torch.zeros_like(pred), pred)
|
|
pred[b_range, obj] = target_pred
|
|
|
|
highest = torch.argsort(pred, dim=1, descending=True)[:,0]
|
|
highest_score = pred[b_range, highest]
|
|
|
|
ranks = 1 + torch.argsort(torch.argsort(pred, dim=1,
|
|
descending=True), dim=1, descending=False)[b_range, obj]
|
|
|
|
sub_all.extend(sub.cpu().numpy())
|
|
obj_all.extend(obj.cpu().numpy())
|
|
rel_all.extend(rel.cpu().numpy())
|
|
target_score.extend(target_pred.cpu().numpy())
|
|
target_rank.extend(ranks.cpu().numpy())
|
|
obj_pred.extend(highest.cpu().numpy())
|
|
obj_pred_score.extend(highest_score.cpu().numpy())
|
|
|
|
ranks = ranks.float()
|
|
results['count'] = torch.numel(
|
|
ranks) + results.get('count', 0.0)
|
|
results['mr'] = torch.sum(
|
|
ranks).item() + results.get('mr', 0.0)
|
|
results['mrr'] = torch.sum(
|
|
1.0/ranks).item() + results.get('mrr', 0.0)
|
|
for k in range(10):
|
|
results['hits@{}'.format(k+1)] = torch.numel(
|
|
ranks[ranks <= (k+1)]) + results.get('hits@{}'.format(k+1), 0.0)
|
|
|
|
if step % 100 == 0:
|
|
self.logger.info('[{}, {} Step {}]\t{}'.format(
|
|
split.title(), mode.title(), step, self.p.name))
|
|
df = pd.DataFrame({"sub":sub_all,"rel":rel_all,"obj":obj_all, "rank": target_rank,"score":target_score, "pred":obj_pred,"pred_score":obj_pred_score})
|
|
df.to_csv(f"{self.p.name}_result.csv",header=True, index=False)
|
|
return results
|
|
|
|
def run_epoch(self, epoch):
|
|
"""
|
|
Function to run one epoch of training
|
|
Parameters
|
|
----------
|
|
epoch: current epoch count
|
|
|
|
Returns
|
|
-------
|
|
loss: The loss value after the completion of one epoch
|
|
"""
|
|
self.model.train()
|
|
|
|
losses = []
|
|
train_iter = iter(self.data_iter['train'])
|
|
|
|
for step, batch in enumerate(train_iter):
|
|
self.optimizer.zero_grad()
|
|
|
|
sub, rel, obj, label, neg_ent, sub_samp = self.read_batch(
|
|
batch, 'train')
|
|
|
|
pred = self.model.forward(sub, rel, neg_ent, self.p.train_strategy)
|
|
loss = self.model.loss(pred, label, sub_samp)
|
|
|
|
loss.backward()
|
|
self.optimizer.step()
|
|
losses.append(loss.item())
|
|
|
|
if step % 100 == 0:
|
|
self.logger.info('[E:{}| {}]: Train Loss:{:.5}, Val MRR:{:.5}, \t{}'.format(
|
|
epoch, step, np.mean(losses), self.best_val_mrr, self.p.name))
|
|
|
|
loss = np.mean(losses)
|
|
self.logger.info(
|
|
'[Epoch:{}]: Training Loss:{:.4}\n'.format(epoch, loss))
|
|
return loss
|
|
|
|
def fit(self):
|
|
"""
|
|
Function to run training and evaluation of model
|
|
Parameters
|
|
----------
|
|
|
|
Returns
|
|
-------
|
|
"""
|
|
self.best_val_mrr, self.best_val, self.best_epoch = 0., {}, 0.
|
|
val_mrr = 0
|
|
save_path = os.path.join('./torch_saved', self.p.name)
|
|
|
|
if self.p.restore:
|
|
self.load_model(save_path)
|
|
self.logger.info('Successfully Loaded previous model')
|
|
|
|
for epoch in range(self.p.max_epochs):
|
|
train_loss = self.run_epoch(epoch)
|
|
val_results = self.evaluate('valid', epoch)
|
|
|
|
if val_results['mrr'] > self.best_val_mrr:
|
|
self.best_val = val_results
|
|
self.best_val_mrr = val_results['mrr']
|
|
self.best_epoch = epoch
|
|
self.save_model(save_path)
|
|
self.logger.info('[Epoch {}]: Training Loss: {:.5}, Valid MRR: {:.5}, \n\n\n'.format(
|
|
epoch, train_loss, self.best_val_mrr))
|
|
|
|
# Restoring model corresponding to the best validation performance and evaluation on test data
|
|
self.logger.info('Loading best model, evaluating on test data')
|
|
self.load_model(save_path)
|
|
self.evaluate('test')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="Parser For Arguments", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
|
|
# Dataset and Experiment name
|
|
parser.add_argument('--data', dest="dataset", default='FB15k-237',
|
|
help='Dataset to use for the experiment')
|
|
parser.add_argument("--name", default='testrun_' +
|
|
str(uuid.uuid4())[:8], help='Name of the experiment')
|
|
|
|
# Training parameters
|
|
parser.add_argument("--gpu", type=str, default='-1',
|
|
help='GPU to use, set -1 for CPU')
|
|
parser.add_argument("--train_strategy", type=str,
|
|
default='one_to_n', help='Training strategy to use')
|
|
parser.add_argument("--opt", type=str, default='adam',
|
|
help='Optimizer to use for training')
|
|
parser.add_argument('--neg_num', dest="neg_num", default=1000, type=int,
|
|
help='Number of negative samples to use for loss calculation')
|
|
parser.add_argument('--batch', dest="batch_size",
|
|
default=128, type=int, help='Batch size')
|
|
parser.add_argument("--l2", type=float, default=0.0,
|
|
help='L2 regularization')
|
|
parser.add_argument("--lr", type=float, default=0.0001,
|
|
help='Learning Rate')
|
|
parser.add_argument("--epoch", dest='max_epochs', default=500,
|
|
type=int, help='Maximum number of epochs. Default: 500')
|
|
parser.add_argument("--num_workers", type=int, default=0,
|
|
help='Maximum number of workers used in DataLoader. Default: 10')
|
|
parser.add_argument('--seed', dest="seed", default=42,
|
|
type=int, help='Seed to reproduce results. Default: 42')
|
|
parser.add_argument('--restore', dest="restore", action='store_true',
|
|
help='Restore from the previously saved model')
|
|
|
|
# Model parameters
|
|
parser.add_argument("--lbl_smooth", dest='lbl_smooth', default=0.1,
|
|
type=float, help='Label smoothing for true labels')
|
|
parser.add_argument("--embed_dim", type=int, default=400,
|
|
help='Embedding dimension for entity and relation, ignored if k_h and k_w are set')
|
|
|
|
# Specific setting for embedding vectors: entity embedding vector and relation embedding vector
|
|
parser.add_argument('--ent_vec_dim', type=int, default=400,
|
|
help="Embedding dimension of entity. Default: 200")
|
|
parser.add_argument('--rel_vec_dim', type=int, default=400,
|
|
help="Embedding dimension of relation. Default: 200")
|
|
|
|
parser.add_argument('--bias', dest="bias", action='store_true',
|
|
help='Whether to use bias in the model.')
|
|
parser.add_argument('--form', type=str, default='plain',
|
|
help='The reshaping form to use.')
|
|
|
|
# Reshape matrix parameters for InteractE
|
|
parser.add_argument('--k_w', dest="k_w", default=10, type=int,
|
|
help='Width of the reshaped matrix. Default: 10')
|
|
parser.add_argument('--k_h', dest="k_h", default=20, type=int,
|
|
help='Height of the reshaped matrix. Default: 20')
|
|
|
|
parser.add_argument('--num_filt', dest="num_filt", default=96, type=int,
|
|
help='Number of filters in convolution. Default: 96. Test: 32, 64, 128')
|
|
|
|
parser.add_argument('--ker_sz', dest="ker_sz", default=9, type=int,
|
|
help='Kernel size to use. Default: 9. Test: 3, 5, 7, 9')
|
|
parser.add_argument('--perm', dest="perm", default=1, type=int,
|
|
help='Number of Feature rearrangement to use. Default: 1, 2, 3, 4, 5')
|
|
|
|
# Configuration for dropout technique
|
|
parser.add_argument('--hid_drop', dest="hid_drop", default=0.5, type=float,
|
|
help='Dropout for Hidden layer. Default: 0.5. Test: 0.2, 0.3, 0.4, 0.5')
|
|
parser.add_argument('--feat_drop', dest="feat_drop", default=0.2, type=float,
|
|
help='Dropout for Feature. Default: 0.5. Test: 0.2, 0.3, 0.4, 0.5')
|
|
parser.add_argument('--inp_drop', dest="inp_drop", default=0.2, type=float,
|
|
help='Dropout for Input layer. Default: 0.5. Test: 0.2, 0.3, 0.4, 0.5')
|
|
parser.add_argument('--drop_path', dest="drop_path", default=0.0, type=float,
|
|
help='Path dropout. Default: 0.5. Test: 0.2, 0.3, 0.4, 0.5')
|
|
parser.add_argument('--drop', dest="drop", default=0.0, type=float,
|
|
help='Inner drop. Default: 0.5. Test: 0.2, 0.3, 0.4, 0.5')
|
|
|
|
# Configuration for in/output channels for ConvE, HypER, HypE
|
|
parser.add_argument('--in_channels', dest="in_channels",
|
|
default=1, type=int, help='Input channels. Default: 1')
|
|
parser.add_argument('--out_channels', dest="out_channels", default=32, type=int,
|
|
help='Output channels. Default: 96. Test: 32, 64, 128. Can be the same with num_filt hyperparameter.')
|
|
parser.add_argument('--filt_h', type=int, default=1,
|
|
help='Height of filter. This configuration for HypER model. Default: 1. Choice: 1, 2, 3, 5, 7, 9')
|
|
parser.add_argument('--filt_w', type=int, default=9,
|
|
help='Width of filter. This configuration for HypER model. Default: 9. If filt_h is 1, then filt_w: 1, 2, 3, 5, 7, 9, 11, 12, 13, 15')
|
|
|
|
# Configuration for mixer layer
|
|
parser.add_argument('--image_h', dest="image_h",
|
|
default=128, type=int, help='')
|
|
|
|
parser.add_argument('--image_w', dest="image_w",
|
|
default=128, type=int, help='')
|
|
|
|
parser.add_argument('--patch_size', dest="patch_size",
|
|
default=8, type=int, help='')
|
|
|
|
parser.add_argument('--mixer_dim', dest="mixer_dim",
|
|
default=256, type=int, help='')
|
|
|
|
parser.add_argument('--expansion_factor', dest="expansion_factor",
|
|
default=4, type=int, help='')
|
|
|
|
parser.add_argument('--expansion_factor_token', dest="expansion_factor_token",
|
|
default=0.5, type=float, help='')
|
|
|
|
parser.add_argument('--mixer_depth', dest="mixer_depth",
|
|
default=16, type=int, help='')
|
|
|
|
parser.add_argument('--mixer_dropout', dest="mixer_dropout",
|
|
default=0.2, type=float, help='')
|
|
|
|
# Logging parameters
|
|
parser.add_argument('--logdir', dest="log_dir",
|
|
default='./log/', help='Log directory')
|
|
parser.add_argument('--config', dest="config_dir",
|
|
default='./config/', help='Config directory')
|
|
|
|
parser.add_argument('--test_only', action='store_true', default=False)
|
|
parser.add_argument('--grid_search', action='store_true', default=False)
|
|
|
|
args = parser.parse_args()
|
|
|
|
prepare_env()
|
|
|
|
set_gpu(args.gpu)
|
|
set_seed(args.seed)
|
|
|
|
|
|
if (args.grid_search):
|
|
|
|
model = Main(args)
|
|
from sklearn.model_selection import GridSearchCV
|
|
from skorch import NeuralNet
|
|
|
|
estimator = NeuralNet(
|
|
module=FouriER(model.p),
|
|
criterion=torch.nn.BCELoss,
|
|
optimizer=torch.optim.Adam,
|
|
max_epochs=100,
|
|
batch_size=128,
|
|
verbose=False
|
|
)
|
|
|
|
paramsGrid = {
|
|
'optimizer__lr': [0.0003, 0.001],
|
|
# 'optimizer__weight_decay': [1e-4, 1e-5, 1e-6],
|
|
# 'module__hid_drop': [0.2, 0.5, 0.7],
|
|
# 'module__embed_dim': [300, 400, 500],
|
|
}
|
|
|
|
grid = GridSearchCV(estimator=estimator, param_grid=paramsGrid, n_jobs=-1, cv=2, scoring=torch.nn.BCELoss)
|
|
data = np.array(model.triples['train'])
|
|
data = data[np.random.choice(np.arange(len(data)), size=int(len(data) * 0.15), replace=False)]
|
|
dataloader = iter(DataLoader(
|
|
TrainDataset(data, model.p),
|
|
batch_size=len(data),
|
|
shuffle=True,
|
|
num_workers=max(0, model.p.num_workers),
|
|
collate_fn=TrainDataset.collate_fn
|
|
))
|
|
for step, batch in enumerate(dataloader):
|
|
sub, rel, obj, label, neg_ent, sub_samp = model.read_batch(
|
|
batch, 'train')
|
|
|
|
if (neg_ent is None):
|
|
neg_ent = np.repeat(None, repeats=len(sub))
|
|
else:
|
|
neg_ent = neg_ent.cpu()
|
|
|
|
inputs = []
|
|
for i in range(len(sub)):
|
|
input = {}
|
|
input['sub'] = sub[i]
|
|
input['rel'] = rel[i]
|
|
input['neg_ents'] = neg_ent[i]
|
|
inputs.append(input)
|
|
search = grid.fit(inputs, label)
|
|
print("BEST SCORE: ", search.best_score_)
|
|
print("BEST PARAMS: ", search.best_params_)
|
|
logger = get_logger(
|
|
args.name, args.log_dir, args.config_dir)
|
|
if (args.test_only):
|
|
model = Main(args, logger)
|
|
save_path = os.path.join('./torch_saved', args.name)
|
|
model.load_model(save_path)
|
|
model.evaluate('test')
|
|
else:
|
|
model = Main(args, logger)
|
|
model.fit()
|
|
# while True:
|
|
# try:
|
|
# model = Main(args, logger)
|
|
# model.fit()
|
|
# except Exception as e:
|
|
# print(e)
|
|
# traceback.print_exc()
|
|
# try:
|
|
# del model
|
|
# except Exception:
|
|
# pass
|
|
# time.sleep(30)
|
|
# continue
|
|
# break
|