2 Commits

Author SHA1 Message Date
cngthnh 45cd8e1396 runnable v1 2023-01-14 10:40:58 +00:00
cngthnh fcfeae2bd3 first version of negative sampling 2023-01-08 14:25:31 +00:00
295 changed files with 1165332 additions and 1346197 deletions
+1
View File
@@ -7,3 +7,4 @@ dataset/FB15k-237/masked_*.txt
dataset/FB15k-237/cached_*.pkl
**/__pycache__/
**/.DS_Store
nohup.out
+38
View File
@@ -0,0 +1,38 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true,
"args": [
"--gpus", "1",
"--max_epochs=16",
"--num_workers=32",
"--model_name_or_path", "bert-base-uncased",
"--accumulate_grad_batches", "1",
"--model_class", "BertKGC",
"--batch_size", "64",
"--checkpoint", "/kg_374/Relphormer/pretrain/output/FB15k-237/epoch=15-step=19299-Eval/hits10=0.96.ckpt",
"--pretrain", "0",
"--bce", "0",
"--check_val_every_n_epoch", "1",
"--overwrite_cache",
"--data_dir", "dataset/FB15k-237",
"--eval_batch_size", "128",
"--max_seq_length", "128",
"--lr", "3e-5",
"--max_triplet", "64",
"--add_attn_bias", "True",
"--use_global_node", "True",
"--fast_dev_run", "True",
]
}
]
}
+21
View File
@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 ZJUNLP
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+6
View File
@@ -0,0 +1,6 @@
from transformers import BartForConditionalGeneration, T5ForConditionalGeneration, GPT2LMHeadModel
from .model import *
@@ -0,0 +1,6 @@
{
"#examples": 3994,
"#kept_examples": 3994,
"#mappable_examples": 743,
"#multiple_answer_examples": 2
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,6 @@
{
"#examples": 3996,
"#kept_examples": 3996,
"#mappable_examples": 755,
"#multiple_answer_examples": 0
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,6 @@
{
"#examples": 20358,
"#kept_examples": 20358,
"#mappable_examples": 3713,
"#multiple_answer_examples": 4
}
File diff suppressed because it is too large Load Diff
+6
View File
@@ -0,0 +1,6 @@
{
"#examples": 3994,
"#kept_examples": 3994,
"#mappable_examples": 743,
"#multiple_answer_examples": 2
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+6
View File
@@ -0,0 +1,6 @@
{
"#examples": 3996,
"#kept_examples": 3996,
"#mappable_examples": 755,
"#multiple_answer_examples": 0
}
File diff suppressed because it is too large Load Diff
+6
View File
@@ -0,0 +1,6 @@
{
"#examples": 20358,
"#kept_examples": 20358,
"#mappable_examples": 3713,
"#multiple_answer_examples": 4
}
File diff suppressed because it is too large Load Diff
+403
View File
@@ -0,0 +1,403 @@
import json
import math
import argparse
from pathlib import Path
from transformers import BertTokenizer, BertForMaskedLM, AdamW, get_linear_schedule_with_warmup, AutoConfig
import torch
from torch import device, nn
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.utilities.seed import seed_everything
from transformers.tokenization_bert import BertTokenizerFast
from kge.model import KgeModel
from kge.util.io import load_checkpoint
from kge.util import sc
# from relphormer.lit_models import TransformerLitModel
from relphormer.models import BertKGC
# from relphormer.data import KGC
import os
os.environ['CUDA_VISIBLE_DEVICES']='4'
MODEL = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(MODEL)
class FBQADataset(Dataset):
def __init__(self, file_dir):
self.examples = json.load(Path(file_dir).open("rb"))
def __len__(self):
return len(self.examples)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
return self.examples[idx]
def fbqa_collate(samples):
questions = []
answers = []
answer_ids = []
entities = []
entity_names = []
relations = []
for item in samples:
q = item["RawQuestion"] + "[MASK]" * len(item["AnswerEntity"]) + "."
questions.append(q)
answers.append(item["AnswerEntity"])
answer_ids.append(item["AnswerEntityID"])
entities.append(item["TopicEntityID"])
entity_names.append(item["TopicEntityName"])
relations.append(item["RelationID"])
questions = tokenizer(questions, return_tensors='pt', padding=True)
entity_names = tokenizer(entity_names, add_special_tokens=False)
answers, answers_lengths = sc.pad_seq_of_seq(answers)
answers = torch.LongTensor(answers)
answers_lengths = torch.LongTensor(answers_lengths)
answer_ids = torch.LongTensor(answer_ids)
input_ids = questions['input_ids']
masked_labels = torch.ones_like(input_ids) * -100
masked_labels[input_ids == tokenizer.mask_token_id] = answers[answers != 0]
entity_mask = torch.zeros_like(input_ids).bool()
entity_span_index = input_ids.new_zeros((len(input_ids), 2))
for i, e_tokens in enumerate(entity_names['input_ids']):
q_tokens = input_ids[i].tolist()
for s_index in range(len(q_tokens) - len(e_tokens)):
if all([e_token == q_tokens[s_index + j] for j, e_token in enumerate(e_tokens)]):
entity_mask[i][s_index:s_index + len(e_tokens)] = True
entity_span_index[i][0] = s_index
entity_span_index[i][1] = s_index + len(e_tokens) - 1
break
entities = torch.LongTensor(entities)
relations = torch.LongTensor(relations)
return questions.data, masked_labels, answers, answers_lengths, answer_ids, entities, relations, entity_mask, entity_span_index
class SelfOutput(nn.Module):
def __init__(self, config):
super().__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
def forward(self, hidden_states, input_tensor):
hidden_states = self.dense(hidden_states)
hidden_states = self.dropout(hidden_states)
hidden_states = self.LayerNorm(hidden_states + input_tensor)
return hidden_states
class CrossAttention(nn.Module):
def __init__(self, config, ctx_hidden_size):
super().__init__()
self.self = CrossAttentionInternal(config, ctx_hidden_size)
self.output = SelfOutput(config)
self.config = config
self.apply(self._init_weights)
def _init_weights(self, module):
""" Initialize the weights """
if isinstance(module, (nn.Linear, nn.Embedding)):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
elif isinstance(module, nn.LayerNorm):
module.bias.data.zero_()
module.weight.data.fill_(1.0)
if isinstance(module, nn.Linear) and module.bias is not None:
module.bias.data.zero_()
def forward(
self,
hidden_states,
attention_mask=None,
head_mask=None,
encoder_hidden_states=None,
encoder_attention_mask=None,
output_attentions=False,
):
self_outputs = self.self(
hidden_states,
attention_mask,
head_mask,
encoder_hidden_states,
encoder_attention_mask,
output_attentions,
)
attention_output = self.output(self_outputs[0], hidden_states)
outputs = (attention_output,) + self_outputs[1:] # add attentions if we output them
return outputs
class CrossAttentionInternal(nn.Module):
def __init__(self, config, ctx_hidden_size):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
raise ValueError(
"The hidden size (%d) is not a multiple of the number of attention "
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
)
self.num_attention_heads = config.num_attention_heads
self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
self.all_head_size = self.num_attention_heads * self.attention_head_size
self.query = nn.Linear(config.hidden_size, self.all_head_size)
self.key = nn.Linear(ctx_hidden_size, self.all_head_size)
self.value = nn.Linear(ctx_hidden_size, self.all_head_size)
self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
def transpose_for_scores(self, x):
new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
x = x.view(*new_x_shape)
return x.permute(0, 2, 1, 3)
def forward(
self,
hidden_states,
attention_mask=None,
head_mask=None,
encoder_hidden_states=None,
encoder_attention_mask=None,
output_attentions=False,
):
mixed_query_layer = self.query(hidden_states)
# If this is instantiated as a cross-attention module, the keys
# and values come from an encoder; the attention mask needs to be
# such that the encoder's padding tokens are not attended to.
mixed_key_layer = self.key(encoder_hidden_states)
mixed_value_layer = self.value(encoder_hidden_states)
attention_mask = encoder_attention_mask
query_layer = self.transpose_for_scores(mixed_query_layer)
key_layer = self.transpose_for_scores(mixed_key_layer)
value_layer = self.transpose_for_scores(mixed_value_layer)
# Take the dot product between "query" and "key" to get the raw attention scores.
attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
if attention_mask is not None:
# Apply the attention mask is (precomputed for all layers in BertModel forward() function)
attention_scores = attention_scores + attention_mask
# Normalize the attention scores to probabilities.
attention_probs = nn.Softmax(dim=-1)(attention_scores)
# This is actually dropping out entire tokens to attend to, which might
# seem a bit unusual, but is taken from the original Transformer paper.
attention_probs = self.dropout(attention_probs)
# Mask heads if we want to
if head_mask is not None:
attention_probs = attention_probs * head_mask
context_layer = torch.matmul(attention_probs, value_layer)
context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
context_layer = context_layer.view(*new_context_layer_shape)
outputs = (context_layer, nn.Softmax(dim=-1)(attention_scores)) if output_attentions else (context_layer,)
return outputs
class CrossTrmFinetuner(pl.LightningModule):
def __init__(self, hparams, bertmodel):
super().__init__()
self._hparams = hparams
self.lr = hparams['lr']
self.weight_decay = hparams['weight_decay']
self.kg_dim = 320
# self.bert = BertForMaskedLM.from_pretrained(MODEL)
self.bert = bertmodel
if self._hparams['use_hitter']:
self.kg_layer_num = 10
self.cross_attentions = nn.ModuleList([CrossAttention(self.bert.config, self.kg_dim)
for _ in range(self.kg_layer_num)])
checkpoint = load_checkpoint('local/best/20200812-174221-trmeh-fb15k237-best/checkpoint_best.pt')
self.hitter = KgeModel.create_from(checkpoint)
def forward(self, batch):
sent_input, masked_labels, batch_labels, label_lens, answer_ids, s, p, entity_mask, entity_span_index = batch
if self._hparams['use_hitter']:
# kg_masks: [bs, 1, 1, length]
# kg_embeds: nlayer*[bs, length, dim]
kg_embeds, kg_masks = self.hitter('get_hitter_repr', s, p)
kg_attentions = [None] * 2 + [(self.cross_attentions[i], kg_embeds[(i + 2) // 2], kg_masks)
for i in range(self.kg_layer_num)]
else:
kg_attentions = []
out = self.bert(kg_attentions=kg_attentions,
output_attentions=True,
output_hidden_states=True,
return_dict=True,
labels=masked_labels,
**sent_input,
)
return out
def training_step(self, batch, batch_idx):
output = self(batch)
loss = output.loss
self.log('train_loss', loss, on_epoch=True, prog_bar=True)
return {'loss': loss}
def validation_step(self, batch, batch_idx):
batch_inputs, masked_labels, batch_labels, label_lens, answer_ids, s, p, entity_mask, _ = batch
output = self(batch)
input_tokens = batch_inputs["input_ids"].clone()
logits = output.logits[masked_labels != -100]
probs = logits.softmax(dim=-1)
values, predictions = probs.topk(1)
hits = []
now_pos = 0
for sample_i, label_length in enumerate(label_lens.tolist()):
failed = False
for i in range(label_length):
if (predictions[now_pos + i] == batch_labels[sample_i][i]).sum() != 1:
failed = True
break
hits += [1] if not failed else [0]
now_pos += label_length
hits = torch.tensor(hits)
input_tokens[input_tokens == tokenizer.mask_token_id] = predictions.flatten()
pred_strings = [str(hits[i].item()) + ' ' + tokenizer.decode(input_tokens[i], skip_special_tokens=True)
for i in range(input_tokens.size(0))]
return {'val_loss': output.loss,
'val_acc': hits.float(),
'pred_strings': pred_strings}
def validation_epoch_end(self, outputs):
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
avg_val_acc = torch.cat([x['val_acc'] for x in outputs]).mean().to(avg_loss.device)
if self.global_rank == 0:
tensorboard = self.logger.experiment
tensorboard.add_text('pred', '\n\n'.join(sum([x['pred_strings'] for x in outputs], [])), self.global_step)
self.log('avg_loss', avg_loss, on_epoch=True, prog_bar=True, sync_dist=True)
self.log('avg_val_acc', avg_val_acc, on_epoch=True, prog_bar=True, sync_dist=True)
return {'val_loss': avg_loss}
def train_dataloader(self):
return DataLoader(FBQADataset(self._hparams['train_dataset']),
self._hparams['batch_size'],
shuffle=True,
collate_fn=fbqa_collate,
num_workers=0)
def val_dataloader(self):
return DataLoader(FBQADataset(self._hparams['val_dataset']),
1,
shuffle=False,
collate_fn=fbqa_collate,
num_workers=0)
def test_dataloader(self):
return DataLoader(FBQADataset(self._hparams['test_dataset']),
1,
shuffle=False,
collate_fn=fbqa_collate,
num_workers=0)
def configure_optimizers(self):
no_decay = ['bias', 'LayerNorm.weight']
no_fine_tune = ['cross_attentions']
pgs = [{'params': [p for n, p in self.named_parameters() if not any(nd in n for nd in no_decay) and not any([i in n for i in no_fine_tune])],
'weight_decay': 0.01},
{'params': [p for n, p in self.named_parameters() if any(nd in n for nd in no_decay) and not any([i in n for i in no_fine_tune])],
'weight_decay': 0.0}]
if self._hparams['use_hitter']:
pgs.append({'params': self.cross_attentions.parameters(), 'lr': 5e-5, 'weight_decay': 0.01})
# bert_optimizer = AdamW(pgs, lr=3e-5, weight_decay=1e-2)
bert_optimizer = AdamW(pgs, lr=self.lr, weight_decay=self.weight_decay)
bert_scheduler = {
'scheduler': get_linear_schedule_with_warmup(bert_optimizer, self._hparams['max_steps'] // 10, self._hparams['max_steps']),
'interval': 'step',
'monitor': None
}
return [bert_optimizer], [bert_scheduler]
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--exp_name", default='default', nargs='?', help="Name of the experiment")
parser.add_argument('--dataset', choices=['fbqa', 'webqsp'], default='fbqa', help="fbqa or webqsp")
parser.add_argument('--filtered', default=False, action='store_true', help="Filtered or not")
parser.add_argument('--hitter', default=False, action='store_true', help="Use pretrained HittER or not")
parser.add_argument('--relphormer', default=False, action='store_true', help="Use pretrained relphormer or not")
parser.add_argument('--seed', default=333, type=int, help='Seed number')
parser.add_argument('--lr', default=3e-5, type=float, help='learning rate')
parser.add_argument('--weight_decay', default=1e-2, type=float, help='weight decay')
args = parser.parse_args()
seed_everything(args.seed)
QA_DATASET = args.dataset
if args.filtered and args.relphormer:
SUBSET = 'relphormer-filtered'
elif not args.filtered and args.relphormer:
SUBSET = 'relphormer'
elif args.filtered and not args.relphormer:
SUBSET = 'fb15k237-filtered'
else:
SUBSET = 'fb15k237'
hparams = {
'use_hitter': args.hitter,
'relphormer': args.relphormer,
'lr': args.lr,
'weight_decay': args.weight_decay,
'batch_size': 16,
'max_epochs': 20,
'train_dataset': f'data/{QA_DATASET}/{SUBSET}/train.json',
'val_dataset': f'data/{QA_DATASET}/{SUBSET}/test.json',
'test_dataset': f'data/{QA_DATASET}/{SUBSET}/test.json',
}
if hparams['relphormer']:
MODEL = "./local/relphormer/"
config = AutoConfig.from_pretrained(MODEL)
bertmodel = BertForMaskedLM.from_pretrained(MODEL, config=config)
model = CrossTrmFinetuner(hparams, bertmodel=bertmodel)
else:
bertmodel = BertForMaskedLM.from_pretrained(MODEL)
model = CrossTrmFinetuner(hparams, bertmodel=bertmodel)
model.hparams['max_steps'] = (len(model.train_dataloader().dataset) // hparams['batch_size'] + 1) * hparams['max_epochs']
base_path = '/tmp/hitbert-paper'
logger = TensorBoardLogger(base_path, args.exp_name)
checkpoint_callback = ModelCheckpoint(
monitor='avg_val_acc',
dirpath=base_path + '/' + args.exp_name,
filename='{epoch:02d}-{avg_val_acc:.3f}',
save_top_k=1,
mode='max')
trainer = pl.Trainer(gpus=1, accelerator="ddp",
max_epochs=hparams['max_epochs'], max_steps=model.hparams['max_steps'],
checkpoint_callback=True,
gradient_clip_val=1.0, logger=logger,
callbacks=[LearningRateMonitor(), checkpoint_callback])
trainer.fit(model)
print("QA Task End!")
+8
View File
@@ -0,0 +1,8 @@
# from transformers.models.bert.modeling_bert import BertForMaskedLM
from models.huggingface_relformer import BertForMaskedLM
class BertKGC(BertForMaskedLM):
@staticmethod
def add_to_argparse(parser):
parser.add_argument("--pretrain", type=int, default=0, help="")
return parser
+10
View File
@@ -0,0 +1,10 @@
for SEED in 111 222 333 444 555 666 777 888 999
do
# echo ${LR} ${WD}
python hitter-bert.py --dataset fbqa \
--relphormer \
--seed ${SEED} \
--exp_name relphormer-fbqa \
--lr 3e-5 \
--weight_decay 1e-2
done
+13
View File
@@ -0,0 +1,13 @@
for SEED in 111 222 333 444 555 666 777 888 999
do
# echo ${LR} ${WD}
python hitter-bert.py --dataset fbqa \
--relphormer \
--filtered \
--seed ${SEED} \
--exp_name relphormer-filtered-fbqa \
--lr 3e-5 \
--weight_decay 1e-2
done
+10
View File
@@ -0,0 +1,10 @@
for SEED in 222 333 444 555 666 777 888 999
do
python hitter-bert.py --dataset webqsp \
--relphormer \
--seed ${SEED} \
--exp_name relphormer-webqsp \
--lr 3e-5 \
--weight_decay 1e-2
done
+12
View File
@@ -0,0 +1,12 @@
for SEED in 111 222 333 444 555 666 777 888 999
do
# echo ${LR} ${WD}
python hitter-bert.py --dataset webqsp \
--relphormer \
--filtered \
--seed ${SEED} \
--exp_name relphormer-filtered-webqsp \
--lr 3e-5 \
--weight_decay 1e-2
done
+1159
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,6 @@
{
"#examples": 1639,
"#kept_examples": 484,
"#mappable_examples": 484,
"#multiple_answer_examples": 800
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,6 @@
{
"#examples": 3098,
"#kept_examples": 850,
"#mappable_examples": 850,
"#multiple_answer_examples": 1437
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+6
View File
@@ -0,0 +1,6 @@
{
"#examples": 1639,
"#kept_examples": 1582,
"#mappable_examples": 484,
"#multiple_answer_examples": 800
}
File diff suppressed because it is too large Load Diff
+6
View File
@@ -0,0 +1,6 @@
{
"#examples": 3098,
"#kept_examples": 2997,
"#mappable_examples": 850,
"#multiple_answer_examples": 1437
}
File diff suppressed because it is too large Load Diff
+115
View File
@@ -0,0 +1,115 @@
# Relphormer
Code for the paper: "Relphormer: Relational Graph Transformer for Knowledge Graph Representations".
> Transformers have achieved remarkable performance in widespread fields, including natural language processing, computer vision and graph mining. However, vanilla Transformer architectures have not yielded promising improvements in the Knowledge Graph (KG) representations, where the translational distance paradigm dominates this area. Note that vanilla Transformer architectures struggle to capture the intrinsically heterogeneous semantic and structural information of knowledge graphs. To this end, we propose a new variant of Transformer for knowledge graph representations dubbed Relphormer. Specifically, we introduce Triple2Seq which can dynamically sample contextualized sub-graph sequences as the input to alleviate the heterogeneity issue. We propose a novel structure-enhanced self-attention mechanism to encode the relational information and keep the globally semantic information among sub-graphs. Moreover, we propose masked knowledge modeling as a new paradigm for knowledge graph representation learning. We apply Relphormer to three tasks, namely, knowledge graph completion, KG-based question answering and KG-based recommendation for evaluation. Experimental results show that Relphormer can obtain better performance on benchmark datasets compared with baselines.
# Model Architecture
<div align=center>
<img src="./resource/model.png" width="85%" height="75%" />
</div>
The model architecture of Relphormer.
The contextualized sub-graph is sampled with Triple2Seq, and then it will be converted into sequences while maintaining its sub-graph structure.
Next, we conduct masked knowledge modeling, which randomly masks the nodes in the center triple in the contextualized sub-graph sequences.
For the transformer architecture, we design a novel structure-enhanced mechanism to preserve the structure feature.
Finally, we utilize our pre-trained KG transformer for KG-based downstream tasks.
# Environments
- python (3.8.13)
- cuda(11.2)
- Ubuntu-18.04.6 (4.15.0-156-generic)
# Requirements
To run the codes, you need to install the requirements:
```
pip install -r requirements.txt
```
The expected structure of files is:
```
── Relphormer
├── data
├── dataset
│   ├── FB15k-237
│   ├── WN18RR
│   ├── umls
│   ├── create_neighbor.py
├── lit_models
│   ├── _init_.py
│   ├── base.py
│   ├── transformer.py
│   └── utils.py
├── models
│   ├── _init_.py
│   ├── huggingface_relformer.py
│   ├── model.py
│   └── utils.py
├── resource
│   └── model.png
├── scripts
│ ├── fb15k-237
│ ├── wn18rr
│   └── umls
├── QA
├── logs
├── main.py
└── requirements.txt
```
# How to run
## KGC Task
### Generate Masked Neighbors
- Use the command below to generate the masked neighbors.
```shell
>> cd dataset
>> python create_neighbor.py --dataset xxx # like python create_neighbor.py --dataset umls
```
### Entity Embedding Initialization
- Then use the command below to add entities to BERT and initialize the entity embedding layer to be used in the later training. For other datasets `FB15k-237` and `WN18RR` , just replace the dataset name with `fb15k-237` and `wn18rr` will be fine.
```shell
>> cd pretrain
>> mkdir logs
>> bash scripts/pretrain_umls.sh
>> tail -f -n 2000 logs/pretrain_umls.log
```
The pretrained models are saved in the `Relphormer/pretrain/output` directory.
### Entity Prediction
- Next use the command below to train the model to predict the correct entity in the masked position. Same as above for other datasets.
```shell
>> cd Relphormer
>> mkdir logs
>> bash scripts/umls/umls.sh
>> tail -f -n 2000 logs/train_umls.log
```
The trained models are saved in the `Relphormer/output` directory.
## QA Task
The experimental settings in QA follow the [Hitter](https://arxiv.org/pdf/2008.12813.pdf) experimental settings, and the environment installation can be done by referring to [GitHub](https://github.com/microsoft/HittER). We only modified **hitter-best.py** to fit our model.
- The relphormer model used by QA can be downloaded [here](https://drive.google.com/file/d/1FK_A_kFq1ECoNm75RfkcvYv8rZiJL1Bw/view?usp=sharing).
```shell
>> cd QA
>> sh scripts/relphormer_fbqa.sh
>> sh scripts/relphormer_fbqa_filtered.sh
>> sh scripts/relphormer_webqsp.sh
>> sh scripts/relphormer_webqsp_filtered.sh
```
-24
View File
@@ -1,24 +0,0 @@
{
"version": 1,
"disable_existing_loggers": false,
"formatters": {
"simple": {
"format": "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s"
}
},
"handlers": {
"file_handler": {
"class": "logging.FileHandler",
"level": "DEBUG",
"formatter": "simple",
"filename": "python_logging.log",
"encoding": "utf8"
}
},
"root": {
"level": "DEBUG",
"handlers": [
"file_handler"
]
}
}
+2
View File
@@ -0,0 +1,2 @@
from .data_module import KGC
from .processor import convert_examples_to_features, KGProcessor
+63
View File
@@ -0,0 +1,63 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import cython
from cython.parallel cimport prange, parallel
cimport numpy
import numpy
def floyd_warshall(adjacency_matrix):
(nrows, ncols) = adjacency_matrix.shape
assert nrows == ncols
cdef unsigned int n = nrows
adj_mat_copy = adjacency_matrix.astype(long, order='C', casting='safe', copy=True)
assert adj_mat_copy.flags['C_CONTIGUOUS']
cdef numpy.ndarray[long, ndim=2, mode='c'] M = adj_mat_copy
cdef numpy.ndarray[long, ndim=2, mode='c'] path = numpy.zeros([n, n], dtype=numpy.int64)
cdef unsigned int i, j, k
cdef long M_ij, M_ik, cost_ikkj
cdef long* M_ptr = &M[0,0]
cdef long* M_i_ptr
cdef long* M_k_ptr
# set unreachable nodes distance to 510
for i in range(n):
for j in range(n):
if i == j:
M[i][j] = 0
elif M[i][j] == 0:
M[i][j] = 510
# floyed algo
for k in range(n):
M_k_ptr = M_ptr + n*k
for i in range(n):
M_i_ptr = M_ptr + n*i
M_ik = M_i_ptr[k]
for j in range(n):
cost_ikkj = M_ik + M_k_ptr[j]
M_ij = M_i_ptr[j]
if M_ij > cost_ikkj:
M_i_ptr[j] = cost_ikkj
path[i][j] = k
# set unreachable path to 510
for i in range(n):
for j in range(n):
if M[i][j] >= 510:
path[i][j] = 510
M[i][j] = 510
return M, path
def get_all_edges(path, i, j):
cdef unsigned int k = path[i][j]
if k == 0:
return []
else:
return get_all_edges(path, i, k) + [k] + get_all_edges(path, k, j)
+71
View File
@@ -0,0 +1,71 @@
"""Base DataModule class."""
from pathlib import Path
from typing import Dict
import argparse
import os
import pytorch_lightning as pl
from torch.utils.data import DataLoader
class Config(dict):
def __getattr__(self, name):
return self.get(name)
def __setattr__(self, name, val):
self[name] = val
BATCH_SIZE = 8
NUM_WORKERS = 8
class BaseDataModule(pl.LightningDataModule):
"""
Base DataModule.
Learn more at https://pytorch-lightning.readthedocs.io/en/stable/datamodules.html
"""
def __init__(self, args: argparse.Namespace = None) -> None:
super().__init__()
self.args = Config(vars(args)) if args is not None else {}
self.batch_size = self.args.get("batch_size", BATCH_SIZE)
self.num_workers = self.args.get("num_workers", NUM_WORKERS)
@staticmethod
def add_to_argparse(parser):
parser.add_argument(
"--batch_size", type=int, default=BATCH_SIZE, help="Number of examples to operate on per forward step."
)
parser.add_argument(
"--num_workers", type=int, default=0, help="Number of additional processes to load data."
)
parser.add_argument(
"--dataset", type=str, default="./dataset/NELL", help="Number of additional processes to load data."
)
return parser
def prepare_data(self):
"""
Use this method to do things that might write to disk or that need to be done only from a single GPU in distributed settings (so don't set state `self.x = y`).
"""
pass
def setup(self, stage=None):
"""
Split into train, val, test, and set dims.
Should assign `torch Dataset` objects to self.data_train, self.data_val, and optionally self.data_test.
"""
self.data_train = None
self.data_val = None
self.data_test = None
def train_dataloader(self):
return DataLoader(self.data_train, shuffle=True, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)
def val_dataloader(self):
return DataLoader(self.data_val, shuffle=False, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)
def test_dataloader(self):
return DataLoader(self.data_test, shuffle=False, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)
+198
View File
@@ -0,0 +1,198 @@
from dataclasses import dataclass
from typing import Any, Callable, Dict, List, NewType, Optional, Tuple, Union
from enum import Enum
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, BertTokenizer
# from transformers.configuration_bert import BertTokenizer, BertTokenizerFast
from transformers.tokenization_utils_base import (BatchEncoding,
PreTrainedTokenizerBase)
from .base_data_module import BaseDataModule
from .processor import KGProcessor, get_dataset, getNegativeEntityId
import transformers
transformers.logging.set_verbosity_error()
class ExplicitEnum(Enum):
"""
Enum with more explicit error message for missing values.
"""
@classmethod
def _missing_(cls, value):
raise ValueError(
f"{value} is not a valid {cls.__name__}, please select one of {list(cls._value2member_map_.keys())}"
)
class PaddingStrategy(ExplicitEnum):
"""
Possible values for the ``padding`` argument in :meth:`PreTrainedTokenizerBase.__call__`. Useful for tab-completion
in an IDE.
"""
LONGEST = "longest"
MAX_LENGTH = "max_length"
DO_NOT_PAD = "do_not_pad"
import numpy as np
@dataclass
class DataCollatorForSeq2Seq:
"""
Data collator that will dynamically pad the inputs received, as well as the labels.
Args:
tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
The tokenizer used for encoding the data.
model (:class:`~transformers.PreTrainedModel`):
The model that is being trained. If set and has the `prepare_decoder_input_ids_from_labels`, use it to
prepare the `decoder_input_ids`
This is useful when using `label_smoothing` to avoid calculating loss twice.
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
among:
* :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
sequence is provided).
* :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
maximum acceptable input length for the model if that argument is not provided.
* :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
different lengths).
max_length (:obj:`int`, `optional`):
Maximum length of the returned list and optionally padding length (see above).
pad_to_multiple_of (:obj:`int`, `optional`):
If set will pad the sequence to a multiple of the provided value.
This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
7.5 (Volta).
label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
The id to use when padding the labels (-100 will be automatically ignored by PyTorch loss functions).
"""
tokenizer: PreTrainedTokenizerBase
model: Optional[Any] = None
padding: Union[bool, str, PaddingStrategy] = True
max_length: Optional[int] = None
pad_to_multiple_of: Optional[int] = None
label_pad_token_id: int = -100
return_tensors: str = "pt"
num_labels: int = 0
args: Any = None
def __call__(self, features, return_tensors=None):
if return_tensors is None:
return_tensors = self.return_tensors
labels = [feature.pop("labels") for feature in features] if "labels" in features[0].keys() else None
label = [feature.pop("label") for feature in features]
features_keys = {}
name_keys = list(features[0].keys())
for k in name_keys:
# ignore the padding arguments
if k in ["input_ids", "attention_mask", "token_type_ids"]: continue
try:
features_keys[k] = [feature.pop(k) for feature in features]
except KeyError:
continue
# We have to pad the labels before calling `tokenizer.pad` as this method won't pad them and needs them of the
# same length to return tensors.
bsz = len(labels)
with torch.no_grad():
new_labels = torch.zeros(bsz, self.num_labels)
for i,l in enumerate(labels):
if isinstance(l, int):
new_labels[i][l] = 1
else:
if (l[0] != getNegativeEntityId(self.args)):
for j in l:
new_labels[i][j] = 1
labels = new_labels
features = self.tokenizer.pad(
features,
padding=self.padding,
max_length=self.max_length,
pad_to_multiple_of=self.pad_to_multiple_of,
return_tensors=return_tensors,
)
features['labels'] = labels
features['label'] = torch.tensor(label)
features.update(features_keys)
return features
class KGC(BaseDataModule):
def __init__(self, args, model) -> None:
super().__init__(args)
self.tokenizer = AutoTokenizer.from_pretrained(self.args.model_name_or_path, use_fast=False)
self.processor = KGProcessor(self.tokenizer, args)
self.label_list = self.processor.get_labels(args.data_dir)
entity_list = self.processor.get_entities(args.data_dir)
num_added_tokens = self.tokenizer.add_special_tokens({'additional_special_tokens': entity_list})
self.sampler = DataCollatorForSeq2Seq(self.tokenizer,
model=model,
label_pad_token_id=self.tokenizer.pad_token_id,
pad_to_multiple_of=8 if self.args.precision == 16 else None,
padding="longest",
max_length=self.args.max_seq_length,
num_labels = len(entity_list),
args=args
)
relations_tokens = self.processor.get_relations(args.data_dir)
self.num_relations = len(relations_tokens)
num_added_tokens = self.tokenizer.add_special_tokens({'additional_special_tokens': relations_tokens})
vocab = self.tokenizer.get_added_vocab()
self.relation_id_st = vocab[relations_tokens[0]]
self.relation_id_ed = vocab[relations_tokens[-1]] + 1
self.entity_id_st = vocab[entity_list[0]]
self.entity_id_ed = vocab[entity_list[-1]] + 1
def setup(self, stage=None):
self.data_train = get_dataset(self.args, self.processor, self.label_list, self.tokenizer, "train")
self.data_val = get_dataset(self.args, self.processor, self.label_list, self.tokenizer, "dev")
self.data_test = get_dataset(self.args, self.processor, self.label_list, self.tokenizer, "test")
def prepare_data(self):
pass
def get_config(self):
d = {}
for k, v in self.__dict__.items():
if "st" in k or "ed" in k:
d.update({k:v})
return d
@staticmethod
def add_to_argparse(parser):
BaseDataModule.add_to_argparse(parser)
parser.add_argument("--model_name_or_path", type=str, default="roberta-base", help="the name or the path to the pretrained model")
parser.add_argument("--data_dir", type=str, default="roberta-base", help="the name or the path to the pretrained model")
parser.add_argument("--max_seq_length", type=int, default=256, help="Number of examples to operate on per forward step.")
parser.add_argument("--warm_up_radio", type=float, default=0.1, help="Number of examples to operate on per forward step.")
parser.add_argument("--eval_batch_size", type=int, default=8)
parser.add_argument("--overwrite_cache", action="store_true", default=False)
return parser
def get_tokenizer(self):
return self.tokenizer
def train_dataloader(self):
return DataLoader(self.data_train, num_workers=self.num_workers, pin_memory=True, collate_fn=self.sampler, batch_size=self.args.batch_size, shuffle=not self.args.faiss_init)
def val_dataloader(self):
return DataLoader(self.data_val, num_workers=self.num_workers, pin_memory=True, collate_fn=self.sampler, batch_size=self.args.eval_batch_size)
def test_dataloader(self):
return DataLoader(self.data_test, num_workers=self.num_workers, pin_memory=True, collate_fn=self.sampler, batch_size=self.args.eval_batch_size)
-15
View File
@@ -1,15 +0,0 @@
# triples: 89320
# entities: 7128
# relations: 12409
# timesteps: 208
# test triples: 8255
# valid triples: 8239
# train triples: 72826
Measure method: N/A
Target Size : 0
Grow Factor: 0
Shrink Factor: 0
Epsilon Factor: 0
Search method: N/A
filter_dupes: inter
nonames: False
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-209
View File
@@ -1,209 +0,0 @@
0 0 2
1 3 5
2 6 7
3 8 9
4 10 12
5 13 14
6 15 16
7 17 19
8 20 21
9 22 23
10 24 26
11 27 28
12 29 30
13 31 33
14 34 35
15 36 37
16 38 40
17 41 42
18 43 44
19 45 46
20 47 48
21 49 49
22 50 50
23 51 51
24 52 53
25 54 54
26 55 55
27 56 57
28 58 59
29 60 61
30 62 62
31 63 63
32 64 65
33 66 68
34 69 70
35 71 71
36 72 72
37 73 74
38 75 76
39 77 78
40 79 80
41 81 82
42 83 84
43 85 85
44 86 87
45 88 89
46 90 91
47 92 93
48 94 96
49 97 97
50 98 99
51 100 101
52 102 103
53 104 105
54 106 107
55 108 110
56 111 112
57 113 114
58 115 116
59 117 118
60 119 119
61 120 121
62 122 124
63 125 125
64 126 127
65 128 129
66 130 131
67 132 133
68 134 135
69 136 138
70 139 139
71 140 140
72 141 141
73 142 143
74 144 145
75 146 147
76 148 148
77 149 150
78 151 152
79 153 154
80 155 155
81 156 157
82 158 159
83 160 161
84 162 163
85 164 166
86 167 167
87 168 168
88 169 169
89 170 170
90 171 173
91 174 175
92 176 177
93 178 180
94 181 182
95 183 183
96 184 185
97 186 187
98 188 188
99 189 190
100 191 192
101 193 194
102 195 195
103 196 197
104 198 199
105 200 201
106 202 203
107 204 205
108 206 208
109 209 210
110 211 212
111 213 215
112 216 217
113 218 219
114 220 221
115 222 222
116 223 224
117 225 226
118 227 229
119 230 231
120 232 233
121 234 236
122 237 238
123 239 239
124 240 241
125 242 243
126 244 245
127 246 246
128 247 248
129 249 250
130 251 251
131 252 252
132 253 253
133 254 254
134 255 256
135 257 257
136 258 259
137 260 261
138 262 263
139 264 264
140 265 265
141 266 266
142 267 267
143 268 269
144 270 271
145 272 272
146 273 273
147 274 274
148 275 276
149 277 278
150 279 279
151 280 281
152 282 283
153 284 285
154 286 286
155 287 287
156 288 288
157 289 289
158 290 291
159 292 292
160 293 293
161 294 294
162 295 295
163 296 297
164 298 299
165 300 300
166 301 301
167 302 303
168 304 305
169 306 307
170 308 309
171 310 310
172 311 312
173 313 313
174 314 314
175 315 315
176 316 316
177 317 317
178 318 319
179 320 320
180 321 321
181 322 322
182 323 323
183 324 324
184 325 326
185 327 327
186 328 328
187 329 329
188 330 330
189 331 332
190 333 334
191 335 335
192 336 336
193 337 338
194 339 340
195 341 342
196 343 343
197 344 344
198 345 346
199 347 348
200 349 349
201 350 350
202 351 352
203 353 355
204 356 357
205 358 359
206 360 362
207 363 365
208 366 366
-72826
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-15
View File
@@ -1,15 +0,0 @@
# triples: 86517
# entities: 7128
# relations: 12409
# timesteps: 208
# test triples: 8218
# valid triples: 8193
# train triples: 70106
Measure method: N/A
Target Size : 0
Grow Factor: 0
Shrink Factor: 0
Epsilon Factor: 0
Search method: N/A
filter_dupes: both
nonames: False
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-209
View File
@@ -1,209 +0,0 @@
0 0 2
1 3 5
2 6 7
3 8 9
4 10 12
5 13 14
6 15 16
7 17 19
8 20 21
9 22 23
10 24 26
11 27 28
12 29 30
13 31 33
14 34 35
15 36 37
16 38 40
17 41 42
18 43 44
19 45 46
20 47 48
21 49 49
22 50 50
23 51 51
24 52 53
25 54 54
26 55 55
27 56 57
28 58 59
29 60 61
30 62 62
31 63 63
32 64 65
33 66 68
34 69 70
35 71 71
36 72 72
37 73 74
38 75 76
39 77 78
40 79 80
41 81 82
42 83 84
43 85 85
44 86 87
45 88 89
46 90 91
47 92 93
48 94 96
49 97 97
50 98 99
51 100 101
52 102 103
53 104 105
54 106 107
55 108 110
56 111 112
57 113 114
58 115 116
59 117 118
60 119 119
61 120 121
62 122 124
63 125 125
64 126 127
65 128 129
66 130 131
67 132 133
68 134 135
69 136 138
70 139 139
71 140 140
72 141 141
73 142 143
74 144 145
75 146 147
76 148 148
77 149 150
78 151 152
79 153 154
80 155 155
81 156 157
82 158 159
83 160 161
84 162 163
85 164 166
86 167 167
87 168 168
88 169 169
89 170 170
90 171 173
91 174 175
92 176 177
93 178 180
94 181 182
95 183 183
96 184 185
97 186 187
98 188 188
99 189 190
100 191 192
101 193 194
102 195 195
103 196 197
104 198 199
105 200 201
106 202 203
107 204 205
108 206 208
109 209 210
110 211 212
111 213 215
112 216 217
113 218 219
114 220 221
115 222 222
116 223 224
117 225 226
118 227 229
119 230 231
120 232 233
121 234 236
122 237 238
123 239 239
124 240 241
125 242 243
126 244 245
127 246 246
128 247 248
129 249 250
130 251 251
131 252 252
132 253 253
133 254 254
134 255 256
135 257 257
136 258 259
137 260 261
138 262 263
139 264 264
140 265 265
141 266 266
142 267 267
143 268 269
144 270 271
145 272 272
146 273 273
147 274 274
148 275 276
149 277 278
150 279 279
151 280 281
152 282 283
153 284 285
154 286 286
155 287 287
156 288 288
157 289 289
158 290 291
159 292 292
160 293 293
161 294 294
162 295 295
163 296 297
164 298 299
165 300 300
166 301 301
167 302 303
168 304 305
169 306 307
170 308 309
171 310 310
172 311 312
173 313 313
174 314 314
175 315 315
176 316 316
177 317 317
178 318 319
179 320 320
180 321 321
181 322 322
182 323 323
183 324 324
184 325 326
185 327 327
186 328 328
187 329 329
188 330 330
189 331 332
190 333 334
191 335 335
192 336 336
193 337 338
194 339 340
195 341 342
196 343 343
197 344 344
198 345 346
199 347 348
200 349 349
201 350 350
202 351 352
203 353 355
204 356 357
205 358 359
206 360 362
207 363 365
208 366 366
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+1159
View File
File diff suppressed because it is too large Load Diff
-15
View File
@@ -1,15 +0,0 @@
# triples: 291818
# entities: 12554
# relations: 423
# timesteps: 70
# test triples: 19271
# valid triples: 20208
# train triples: 252339
Measure method: N/A
Target Size : 423
Grow Factor: 0
Shrink Factor: 4.0
Epsilon Factor: 0
Search method: N/A
filter_dupes: inter
nonames: False
File diff suppressed because it is too large Load Diff
-423
View File
@@ -1,423 +0,0 @@
0 P131[0-0]
1 P131[1-1]
2 P131[2-2]
3 P131[3-3]
4 P131[4-4]
5 P131[5-5]
6 P131[6-6]
7 P131[7-7]
8 P131[8-8]
9 P131[9-9]
10 P131[10-10]
11 P131[11-11]
12 P131[12-12]
13 P131[13-13]
14 P131[14-14]
15 P131[15-15]
16 P131[16-16]
17 P131[17-17]
18 P131[18-18]
19 P131[19-19]
20 P131[20-20]
21 P131[21-21]
22 P131[22-22]
23 P131[23-23]
24 P131[24-24]
25 P131[25-25]
26 P131[26-26]
27 P131[27-27]
28 P131[28-28]
29 P131[29-29]
30 P131[30-30]
31 P131[31-31]
32 P131[32-32]
33 P131[33-33]
34 P131[34-34]
35 P131[35-35]
36 P131[36-36]
37 P131[37-37]
38 P131[38-38]
39 P131[39-39]
40 P131[40-40]
41 P131[41-41]
42 P131[42-42]
43 P131[43-43]
44 P131[44-44]
45 P131[45-45]
46 P131[46-46]
47 P131[47-47]
48 P131[48-48]
49 P131[49-49]
50 P131[50-50]
51 P131[51-51]
52 P131[52-52]
53 P131[53-53]
54 P131[54-54]
55 P131[55-55]
56 P131[56-56]
57 P131[57-57]
58 P131[58-58]
59 P131[59-59]
60 P131[60-60]
61 P131[61-61]
62 P131[62-62]
63 P131[63-63]
64 P131[64-64]
65 P131[65-65]
66 P131[66-66]
67 P131[67-67]
68 P131[68-68]
69 P131[69-69]
70 P1435[65-65]
71 P39[49-49]
72 P39[50-50]
73 P39[51-51]
74 P39[52-52]
75 P39[53-53]
76 P39[54-54]
77 P39[55-55]
78 P39[56-56]
79 P39[57-57]
80 P39[58-58]
81 P39[59-59]
82 P39[60-60]
83 P39[61-61]
84 P39[62-62]
85 P39[63-63]
86 P39[64-64]
87 P39[65-65]
88 P39[66-66]
89 P39[67-67]
90 P39[68-68]
91 P39[69-69]
92 P54[40-40]
93 P54[41-41]
94 P54[42-42]
95 P54[43-43]
96 P54[44-44]
97 P54[45-45]
98 P54[46-46]
99 P54[47-47]
100 P54[48-48]
101 P54[49-49]
102 P54[50-50]
103 P54[51-51]
104 P54[52-52]
105 P54[53-53]
106 P54[54-54]
107 P54[55-55]
108 P54[56-56]
109 P54[57-57]
110 P54[58-58]
111 P54[59-59]
112 P54[60-60]
113 P54[61-61]
114 P54[62-62]
115 P54[63-63]
116 P54[64-64]
117 P54[65-65]
118 P54[66-66]
119 P54[67-67]
120 P54[68-68]
121 P54[69-69]
122 P31[0-0]
123 P31[1-1]
124 P31[2-2]
125 P31[3-3]
126 P31[4-4]
127 P31[5-5]
128 P31[6-6]
129 P31[7-7]
130 P31[8-8]
131 P31[9-9]
132 P31[10-10]
133 P31[11-11]
134 P31[12-12]
135 P31[13-13]
136 P31[14-14]
137 P31[15-15]
138 P31[16-16]
139 P31[17-17]
140 P31[18-18]
141 P31[19-19]
142 P31[20-20]
143 P31[21-21]
144 P31[22-22]
145 P31[23-23]
146 P31[24-24]
147 P31[25-25]
148 P31[26-26]
149 P31[27-27]
150 P31[28-28]
151 P31[29-29]
152 P31[30-30]
153 P31[31-31]
154 P31[32-32]
155 P31[33-33]
156 P31[34-34]
157 P31[35-35]
158 P31[36-36]
159 P31[37-37]
160 P31[38-38]
161 P31[39-39]
162 P31[40-40]
163 P31[41-41]
164 P31[42-42]
165 P31[43-43]
166 P31[44-44]
167 P31[45-45]
168 P31[46-46]
169 P31[47-47]
170 P31[48-48]
171 P31[49-49]
172 P31[50-50]
173 P31[51-51]
174 P31[52-52]
175 P31[53-53]
176 P31[54-54]
177 P31[55-55]
178 P31[56-56]
179 P31[57-57]
180 P31[58-58]
181 P31[59-59]
182 P31[60-60]
183 P31[61-61]
184 P31[62-62]
185 P31[63-63]
186 P31[64-64]
187 P31[65-65]
188 P31[66-66]
189 P31[67-67]
190 P31[68-68]
191 P31[69-69]
192 P463[26-26]
193 P463[27-27]
194 P463[28-28]
195 P463[29-29]
196 P463[30-30]
197 P463[31-31]
198 P463[32-32]
199 P463[33-33]
200 P463[34-34]
201 P463[35-35]
202 P463[36-36]
203 P463[37-37]
204 P463[38-38]
205 P463[39-39]
206 P463[40-40]
207 P463[41-41]
208 P463[42-42]
209 P463[43-43]
210 P463[44-44]
211 P463[45-45]
212 P463[46-46]
213 P463[47-47]
214 P463[48-48]
215 P463[49-49]
216 P463[50-50]
217 P463[51-51]
218 P463[52-52]
219 P463[53-53]
220 P463[54-54]
221 P463[55-55]
222 P463[56-56]
223 P463[57-57]
224 P463[58-58]
225 P463[59-59]
226 P463[60-60]
227 P463[61-61]
228 P463[62-62]
229 P463[63-63]
230 P463[64-64]
231 P463[65-65]
232 P463[66-66]
233 P463[67-67]
234 P463[68-68]
235 P463[69-69]
236 P512[4-69]
237 P190[0-29]
238 P150[0-3]
239 P1376[39-47]
240 P463[0-7]
241 P166[0-7]
242 P2962[18-30]
243 P108[29-36]
244 P39[0-3]
245 P17[47-48]
246 P166[21-23]
247 P793[46-69]
248 P69[32-41]
249 P17[57-58]
250 P190[42-45]
251 P2962[39-42]
252 P54[0-18]
253 P26[56-61]
254 P150[14-17]
255 P463[16-17]
256 P26[39-46]
257 P579[36-43]
258 P579[16-23]
259 P2962[59-60]
260 P1411[59-61]
261 P26[20-27]
262 P6[4-69]
263 P1435[33-34]
264 P166[52-53]
265 P108[49-57]
266 P150[10-13]
267 P1346[47-68]
268 P150[18-21]
269 P1346[13-46]
270 P69[20-23]
271 P39[31-32]
272 P1411[32-37]
273 P166[62-63]
274 P150[44-47]
275 P2962[61-62]
276 P150[48-51]
277 P150[52-55]
278 P1411[62-67]
279 P1435[35-36]
280 P1411[48-51]
281 P150[22-25]
282 P2962[63-64]
283 P2962[65-66]
284 P166[58-59]
285 P190[46-49]
286 P54[34-35]
287 P1435[4-16]
288 P463[18-19]
289 P150[31-34]
290 P150[35-38]
291 P39[35-36]
292 P26[62-69]
293 P1411[56-58]
294 P1435[37-38]
295 P166[60-61]
296 P39[33-34]
297 P102[24-31]
298 P2962[43-46]
299 P108[37-48]
300 P190[50-53]
301 P39[4-6]
302 P1435[39-40]
303 P793[0-45]
304 P150[64-69]
305 P39[19-22]
306 P27[30-38]
307 P2962[31-38]
308 P1411[24-31]
309 P102[40-45]
310 P39[37-38]
311 P463[8-11]
312 P1435[41-42]
313 P27[52-59]
314 P69[16-19]
315 P17[16-18]
316 P190[54-57]
317 P1435[43-44]
318 P166[8-15]
319 P166[45-47]
320 P2962[47-50]
321 P39[39-40]
322 P1411[52-55]
323 P108[58-69]
324 P463[20-21]
325 P39[41-42]
326 P150[26-30]
327 P150[39-43]
328 P1435[45-46]
329 P26[28-38]
330 P54[27-30]
331 P190[58-61]
332 P17[59-61]
333 P54[36-37]
334 P166[16-20]
335 P166[37-40]
336 P1435[47-48]
337 P17[0-3]
338 P26[47-55]
339 P1435[49-50]
340 P1435[25-28]
341 P150[4-9]
342 P102[63-69]
343 P26[0-19]
344 P1435[17-24]
345 P39[23-26]
346 P1435[51-52]
347 P39[7-11]
348 P69[12-15]
349 P69[24-31]
350 P102[0-23]
351 P39[43-44]
352 P579[24-35]
353 P190[62-65]
354 P1435[53-54]
355 P1376[0-18]
356 P27[0-14]
357 P463[12-15]
358 P166[33-36]
359 P102[32-39]
360 P17[4-7]
361 P190[30-41]
362 P166[24-28]
363 P190[66-69]
364 P69[42-69]
365 P1435[55-56]
366 P54[31-33]
367 P39[45-46]
368 P17[12-15]
369 P1435[57-58]
370 P54[19-26]
371 P2962[51-54]
372 P2962[67-69]
373 P1435[59-60]
374 P579[44-56]
375 P1435[61-62]
376 P166[41-44]
377 P17[19-22]
378 P1376[19-38]
379 P17[23-26]
380 P1376[48-69]
381 P463[22-23]
382 P17[27-30]
383 P1435[63-64]
384 P69[0-3]
385 P1435[66-67]
386 P17[35-38]
387 P69[8-11]
388 P1435[68-69]
389 P17[31-34]
390 P102[46-53]
391 P27[60-69]
392 P579[57-69]
393 P69[4-7]
394 P1411[7-14]
395 P551[0-35]
396 P108[0-28]
397 P17[8-11]
398 P1411[38-47]
399 P17[43-46]
400 P17[49-52]
401 P166[64-69]
402 P1435[29-32]
403 P54[38-39]
404 P39[27-30]
405 P2962[55-58]
406 P463[24-25]
407 P17[39-42]
408 P17[53-56]
409 P17[66-69]
410 P17[62-65]
411 P1411[15-23]
412 P166[48-51]
413 P27[15-29]
414 P150[56-63]
415 P27[39-51]
416 P39[47-48]
417 P166[29-32]
418 P39[12-18]
419 P166[54-57]
420 P551[36-69]
421 P579[0-15]
422 P102[54-62]
File diff suppressed because it is too large Load Diff
-71
View File
@@ -1,71 +0,0 @@
0 19 19
1 20 1643
2 1644 1790
3 1791 1816
4 1817 1855
5 1856 1871
6 1872 1893
7 1894 1905
8 1906 1913
9 1914 1918
10 1919 1920
11 1921 1924
12 1925 1929
13 1930 1933
14 1934 1937
15 1938 1941
16 1942 1945
17 1946 1948
18 1949 1950
19 1951 1953
20 1954 1956
21 1957 1959
22 1960 1961
23 1962 1963
24 1964 1965
25 1966 1967
26 1968 1968
27 1969 1970
28 1971 1972
29 1973 1974
30 1975 1976
31 1977 1978
32 1979 1980
33 1981 1982
34 1983 1983
35 1984 1984
36 1985 1985
37 1986 1986
38 1987 1987
39 1988 1988
40 1989 1989
41 1990 1990
42 1991 1991
43 1992 1992
44 1993 1993
45 1994 1994
46 1995 1995
47 1996 1996
48 1997 1997
49 1998 1998
50 1999 1999
51 2000 2000
52 2001 2001
53 2002 2002
54 2003 2003
55 2004 2004
56 2005 2005
57 2006 2006
58 2007 2007
59 2008 2008
60 2009 2009
61 2010 2010
62 2011 2011
63 2012 2012
64 2013 2013
65 2014 2014
66 2015 2015
67 2016 2016
68 2017 2017
69 2018 2020
70 2021 2021
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-15
View File
@@ -1,15 +0,0 @@
# triples: 231529
# entities: 12554
# relations: 423
# timesteps: 70
# test triples: 16195
# valid triples: 16707
# train triples: 198627
Measure method: N/A
Target Size : 423
Grow Factor: 0
Shrink Factor: 4.0
Epsilon Factor: 0
Search method: N/A
filter_dupes: both
nonames: False
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-423
View File
@@ -1,423 +0,0 @@
0 P131[0-0]
1 P131[1-1]
2 P131[2-2]
3 P131[3-3]
4 P131[4-4]
5 P131[5-5]
6 P131[6-6]
7 P131[7-7]
8 P131[8-8]
9 P131[9-9]
10 P131[10-10]
11 P131[11-11]
12 P131[12-12]
13 P131[13-13]
14 P131[14-14]
15 P131[15-15]
16 P131[16-16]
17 P131[17-17]
18 P131[18-18]
19 P131[19-19]
20 P131[20-20]
21 P131[21-21]
22 P131[22-22]
23 P131[23-23]
24 P131[24-24]
25 P131[25-25]
26 P131[26-26]
27 P131[27-27]
28 P131[28-28]
29 P131[29-29]
30 P131[30-30]
31 P131[31-31]
32 P131[32-32]
33 P131[33-33]
34 P131[34-34]
35 P131[35-35]
36 P131[36-36]
37 P131[37-37]
38 P131[38-38]
39 P131[39-39]
40 P131[40-40]
41 P131[41-41]
42 P131[42-42]
43 P131[43-43]
44 P131[44-44]
45 P131[45-45]
46 P131[46-46]
47 P131[47-47]
48 P131[48-48]
49 P131[49-49]
50 P131[50-50]
51 P131[51-51]
52 P131[52-52]
53 P131[53-53]
54 P131[54-54]
55 P131[55-55]
56 P131[56-56]
57 P131[57-57]
58 P131[58-58]
59 P131[59-59]
60 P131[60-60]
61 P131[61-61]
62 P131[62-62]
63 P131[63-63]
64 P131[64-64]
65 P131[65-65]
66 P131[66-66]
67 P131[67-67]
68 P131[68-68]
69 P131[69-69]
70 P1435[65-65]
71 P39[49-49]
72 P39[50-50]
73 P39[51-51]
74 P39[52-52]
75 P39[53-53]
76 P39[54-54]
77 P39[55-55]
78 P39[56-56]
79 P39[57-57]
80 P39[58-58]
81 P39[59-59]
82 P39[60-60]
83 P39[61-61]
84 P39[62-62]
85 P39[63-63]
86 P39[64-64]
87 P39[65-65]
88 P39[66-66]
89 P39[67-67]
90 P39[68-68]
91 P39[69-69]
92 P54[40-40]
93 P54[41-41]
94 P54[42-42]
95 P54[43-43]
96 P54[44-44]
97 P54[45-45]
98 P54[46-46]
99 P54[47-47]
100 P54[48-48]
101 P54[49-49]
102 P54[50-50]
103 P54[51-51]
104 P54[52-52]
105 P54[53-53]
106 P54[54-54]
107 P54[55-55]
108 P54[56-56]
109 P54[57-57]
110 P54[58-58]
111 P54[59-59]
112 P54[60-60]
113 P54[61-61]
114 P54[62-62]
115 P54[63-63]
116 P54[64-64]
117 P54[65-65]
118 P54[66-66]
119 P54[67-67]
120 P54[68-68]
121 P54[69-69]
122 P31[0-0]
123 P31[1-1]
124 P31[2-2]
125 P31[3-3]
126 P31[4-4]
127 P31[5-5]
128 P31[6-6]
129 P31[7-7]
130 P31[8-8]
131 P31[9-9]
132 P31[10-10]
133 P31[11-11]
134 P31[12-12]
135 P31[13-13]
136 P31[14-14]
137 P31[15-15]
138 P31[16-16]
139 P31[17-17]
140 P31[18-18]
141 P31[19-19]
142 P31[20-20]
143 P31[21-21]
144 P31[22-22]
145 P31[23-23]
146 P31[24-24]
147 P31[25-25]
148 P31[26-26]
149 P31[27-27]
150 P31[28-28]
151 P31[29-29]
152 P31[30-30]
153 P31[31-31]
154 P31[32-32]
155 P31[33-33]
156 P31[34-34]
157 P31[35-35]
158 P31[36-36]
159 P31[37-37]
160 P31[38-38]
161 P31[39-39]
162 P31[40-40]
163 P31[41-41]
164 P31[42-42]
165 P31[43-43]
166 P31[44-44]
167 P31[45-45]
168 P31[46-46]
169 P31[47-47]
170 P31[48-48]
171 P31[49-49]
172 P31[50-50]
173 P31[51-51]
174 P31[52-52]
175 P31[53-53]
176 P31[54-54]
177 P31[55-55]
178 P31[56-56]
179 P31[57-57]
180 P31[58-58]
181 P31[59-59]
182 P31[60-60]
183 P31[61-61]
184 P31[62-62]
185 P31[63-63]
186 P31[64-64]
187 P31[65-65]
188 P31[66-66]
189 P31[67-67]
190 P31[68-68]
191 P31[69-69]
192 P463[26-26]
193 P463[27-27]
194 P463[28-28]
195 P463[29-29]
196 P463[30-30]
197 P463[31-31]
198 P463[32-32]
199 P463[33-33]
200 P463[34-34]
201 P463[35-35]
202 P463[36-36]
203 P463[37-37]
204 P463[38-38]
205 P463[39-39]
206 P463[40-40]
207 P463[41-41]
208 P463[42-42]
209 P463[43-43]
210 P463[44-44]
211 P463[45-45]
212 P463[46-46]
213 P463[47-47]
214 P463[48-48]
215 P463[49-49]
216 P463[50-50]
217 P463[51-51]
218 P463[52-52]
219 P463[53-53]
220 P463[54-54]
221 P463[55-55]
222 P463[56-56]
223 P463[57-57]
224 P463[58-58]
225 P463[59-59]
226 P463[60-60]
227 P463[61-61]
228 P463[62-62]
229 P463[63-63]
230 P463[64-64]
231 P463[65-65]
232 P463[66-66]
233 P463[67-67]
234 P463[68-68]
235 P463[69-69]
236 P512[4-69]
237 P190[0-29]
238 P150[0-3]
239 P1376[39-47]
240 P463[0-7]
241 P166[0-7]
242 P2962[18-30]
243 P108[29-36]
244 P39[0-3]
245 P17[47-48]
246 P166[21-23]
247 P793[46-69]
248 P69[32-41]
249 P17[57-58]
250 P190[42-45]
251 P2962[39-42]
252 P54[0-18]
253 P26[56-61]
254 P150[14-17]
255 P463[16-17]
256 P26[39-46]
257 P579[36-43]
258 P579[16-23]
259 P2962[59-60]
260 P1411[59-61]
261 P26[20-27]
262 P6[4-69]
263 P1435[33-34]
264 P166[52-53]
265 P108[49-57]
266 P150[10-13]
267 P1346[47-68]
268 P150[18-21]
269 P1346[13-46]
270 P69[20-23]
271 P39[31-32]
272 P1411[32-37]
273 P166[62-63]
274 P150[44-47]
275 P2962[61-62]
276 P150[48-51]
277 P150[52-55]
278 P1411[62-67]
279 P1435[35-36]
280 P1411[48-51]
281 P150[22-25]
282 P2962[63-64]
283 P2962[65-66]
284 P166[58-59]
285 P190[46-49]
286 P54[34-35]
287 P1435[4-16]
288 P463[18-19]
289 P150[31-34]
290 P150[35-38]
291 P39[35-36]
292 P26[62-69]
293 P1411[56-58]
294 P1435[37-38]
295 P166[60-61]
296 P39[33-34]
297 P102[24-31]
298 P2962[43-46]
299 P108[37-48]
300 P190[50-53]
301 P39[4-6]
302 P1435[39-40]
303 P793[0-45]
304 P150[64-69]
305 P39[19-22]
306 P27[30-38]
307 P2962[31-38]
308 P1411[24-31]
309 P102[40-45]
310 P39[37-38]
311 P463[8-11]
312 P1435[41-42]
313 P27[52-59]
314 P69[16-19]
315 P17[16-18]
316 P190[54-57]
317 P1435[43-44]
318 P166[8-15]
319 P166[45-47]
320 P2962[47-50]
321 P39[39-40]
322 P1411[52-55]
323 P108[58-69]
324 P463[20-21]
325 P39[41-42]
326 P150[26-30]
327 P150[39-43]
328 P1435[45-46]
329 P26[28-38]
330 P54[27-30]
331 P190[58-61]
332 P17[59-61]
333 P54[36-37]
334 P166[16-20]
335 P166[37-40]
336 P1435[47-48]
337 P17[0-3]
338 P26[47-55]
339 P1435[49-50]
340 P1435[25-28]
341 P150[4-9]
342 P102[63-69]
343 P26[0-19]
344 P1435[17-24]
345 P39[23-26]
346 P1435[51-52]
347 P39[7-11]
348 P69[12-15]
349 P69[24-31]
350 P102[0-23]
351 P39[43-44]
352 P579[24-35]
353 P190[62-65]
354 P1435[53-54]
355 P1376[0-18]
356 P27[0-14]
357 P463[12-15]
358 P166[33-36]
359 P102[32-39]
360 P17[4-7]
361 P190[30-41]
362 P166[24-28]
363 P190[66-69]
364 P69[42-69]
365 P1435[55-56]
366 P54[31-33]
367 P39[45-46]
368 P17[12-15]
369 P1435[57-58]
370 P54[19-26]
371 P2962[51-54]
372 P2962[67-69]
373 P1435[59-60]
374 P579[44-56]
375 P1435[61-62]
376 P166[41-44]
377 P17[19-22]
378 P1376[19-38]
379 P17[23-26]
380 P1376[48-69]
381 P463[22-23]
382 P17[27-30]
383 P1435[63-64]
384 P69[0-3]
385 P1435[66-67]
386 P17[35-38]
387 P69[8-11]
388 P1435[68-69]
389 P17[31-34]
390 P102[46-53]
391 P27[60-69]
392 P579[57-69]
393 P69[4-7]
394 P1411[7-14]
395 P551[0-35]
396 P108[0-28]
397 P17[8-11]
398 P1411[38-47]
399 P17[43-46]
400 P17[49-52]
401 P166[64-69]
402 P1435[29-32]
403 P54[38-39]
404 P39[27-30]
405 P2962[55-58]
406 P463[24-25]
407 P17[39-42]
408 P17[53-56]
409 P17[66-69]
410 P17[62-65]
411 P1411[15-23]
412 P166[48-51]
413 P27[15-29]
414 P150[56-63]
415 P27[39-51]
416 P39[47-48]
417 P166[29-32]
418 P39[12-18]
419 P166[54-57]
420 P551[36-69]
421 P579[0-15]
422 P102[54-62]
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-71
View File
@@ -1,71 +0,0 @@
0 19 19
1 20 1643
2 1644 1790
3 1791 1816
4 1817 1855
5 1856 1871
6 1872 1893
7 1894 1905
8 1906 1913
9 1914 1918
10 1919 1920
11 1921 1924
12 1925 1929
13 1930 1933
14 1934 1937
15 1938 1941
16 1942 1945
17 1946 1948
18 1949 1950
19 1951 1953
20 1954 1956
21 1957 1959
22 1960 1961
23 1962 1963
24 1964 1965
25 1966 1967
26 1968 1968
27 1969 1970
28 1971 1972
29 1973 1974
30 1975 1976
31 1977 1978
32 1979 1980
33 1981 1982
34 1983 1983
35 1984 1984
36 1985 1985
37 1986 1986
38 1987 1987
39 1988 1988
40 1989 1989
41 1990 1990
42 1991 1991
43 1992 1992
44 1993 1993
45 1994 1994
46 1995 1995
47 1996 1996
48 1997 1997
49 1998 1998
50 1999 1999
51 2000 2000
52 2001 2001
53 2002 2002
54 2003 2003
55 2004 2004
56 2005 2005
57 2006 2006
58 2007 2007
59 2008 2008
60 2009 2009
61 2010 2010
62 2011 2011
63 2012 2012
64 2013 2013
65 2014 2014
66 2015 2015
67 2016 2016
68 2017 2017
69 2018 2020
70 2021 2021
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-15
View File
@@ -1,15 +0,0 @@
# triples: 78032
# entities: 10526
# relations: 177
# timesteps: 46
# test triples: 6909
# valid triples: 7198
# train triples: 63925
Measure method: N/A
Target Size : 0
Grow Factor: 0
Shrink Factor: 0
Epsilon Factor: 5.0
Search method: N/A
filter_dupes: inter
nonames: False
File diff suppressed because it is too large Load Diff
-177
View File
@@ -1,177 +0,0 @@
0 <wasBornIn>[0-2]
1 <wasBornIn>[2-5]
2 <wasBornIn>[5-7]
3 <wasBornIn>[7-10]
4 <wasBornIn>[10-12]
5 <wasBornIn>[12-15]
6 <wasBornIn>[15-17]
7 <wasBornIn>[17-20]
8 <wasBornIn>[20-22]
9 <wasBornIn>[22-25]
10 <wasBornIn>[25-27]
11 <wasBornIn>[27-30]
12 <wasBornIn>[30-32]
13 <wasBornIn>[32-35]
14 <wasBornIn>[35-45]
15 <wasBornIn>[52-52]
16 <diedIn>[0-3]
17 <diedIn>[3-5]
18 <diedIn>[5-7]
19 <diedIn>[7-10]
20 <diedIn>[10-12]
21 <diedIn>[12-14]
22 <diedIn>[14-17]
23 <diedIn>[17-19]
24 <diedIn>[19-21]
25 <diedIn>[21-23]
26 <diedIn>[23-25]
27 <diedIn>[25-27]
28 <diedIn>[27-29]
29 <diedIn>[29-32]
30 <diedIn>[32-34]
31 <diedIn>[34-36]
32 <diedIn>[36-38]
33 <diedIn>[38-40]
34 <diedIn>[40-42]
35 <diedIn>[42-44]
36 <diedIn>[44-47]
37 <diedIn>[47-49]
38 <diedIn>[49-51]
39 <diedIn>[51-53]
40 <diedIn>[53-55]
41 <diedIn>[55-57]
42 <diedIn>[59-59]
43 <worksAt>[0-3]
44 <worksAt>[3-5]
45 <worksAt>[5-7]
46 <worksAt>[7-10]
47 <worksAt>[10-12]
48 <worksAt>[12-14]
49 <worksAt>[14-17]
50 <worksAt>[17-19]
51 <worksAt>[19-21]
52 <worksAt>[21-23]
53 <worksAt>[23-25]
54 <worksAt>[25-27]
55 <worksAt>[27-29]
56 <worksAt>[29-32]
57 <worksAt>[32-34]
58 <worksAt>[34-36]
59 <worksAt>[36-40]
60 <worksAt>[40-42]
61 <worksAt>[42-47]
62 <worksAt>[47-53]
63 <worksAt>[59-59]
64 <playsFor>[0-3]
65 <playsFor>[3-5]
66 <playsFor>[5-23]
67 <playsFor>[23-25]
68 <playsFor>[25-27]
69 <playsFor>[27-29]
70 <playsFor>[29-32]
71 <playsFor>[32-34]
72 <playsFor>[34-36]
73 <playsFor>[36-38]
74 <playsFor>[38-40]
75 <playsFor>[40-42]
76 <playsFor>[42-44]
77 <playsFor>[44-47]
78 <playsFor>[47-51]
79 <playsFor>[59-59]
80 <hasWonPrize>[1-4]
81 <hasWonPrize>[4-6]
82 <hasWonPrize>[6-8]
83 <hasWonPrize>[8-11]
84 <hasWonPrize>[11-15]
85 <hasWonPrize>[15-18]
86 <hasWonPrize>[18-22]
87 <hasWonPrize>[22-26]
88 <hasWonPrize>[26-30]
89 <hasWonPrize>[30-33]
90 <hasWonPrize>[33-37]
91 <hasWonPrize>[37-47]
92 <hasWonPrize>[47-53]
93 <hasWonPrize>[59-59]
94 <isMarriedTo>[0-3]
95 <isMarriedTo>[3-5]
96 <isMarriedTo>[5-7]
97 <isMarriedTo>[7-10]
98 <isMarriedTo>[10-12]
99 <isMarriedTo>[12-14]
100 <isMarriedTo>[14-17]
101 <isMarriedTo>[17-19]
102 <isMarriedTo>[19-21]
103 <isMarriedTo>[21-23]
104 <isMarriedTo>[23-25]
105 <isMarriedTo>[25-27]
106 <isMarriedTo>[27-29]
107 <isMarriedTo>[29-32]
108 <isMarriedTo>[32-34]
109 <isMarriedTo>[34-38]
110 <isMarriedTo>[38-42]
111 <isMarriedTo>[42-47]
112 <isMarriedTo>[47-51]
113 <isMarriedTo>[51-55]
114 <isMarriedTo>[59-59]
115 <owns>[0-10]
116 <owns>[10-17]
117 <owns>[17-19]
118 <owns>[19-23]
119 <owns>[23-36]
120 <owns>[36-38]
121 <owns>[59-59]
122 <graduatedFrom>[0-3]
123 <graduatedFrom>[3-5]
124 <graduatedFrom>[5-7]
125 <graduatedFrom>[7-10]
126 <graduatedFrom>[10-14]
127 <graduatedFrom>[14-17]
128 <graduatedFrom>[17-19]
129 <graduatedFrom>[19-21]
130 <graduatedFrom>[21-23]
131 <graduatedFrom>[23-27]
132 <graduatedFrom>[27-32]
133 <graduatedFrom>[32-34]
134 <graduatedFrom>[34-38]
135 <graduatedFrom>[38-42]
136 <graduatedFrom>[59-59]
137 <isAffiliatedTo>[1-4]
138 <isAffiliatedTo>[4-6]
139 <isAffiliatedTo>[6-8]
140 <isAffiliatedTo>[8-11]
141 <isAffiliatedTo>[11-13]
142 <isAffiliatedTo>[13-15]
143 <isAffiliatedTo>[15-18]
144 <isAffiliatedTo>[18-20]
145 <isAffiliatedTo>[20-22]
146 <isAffiliatedTo>[22-24]
147 <isAffiliatedTo>[24-26]
148 <isAffiliatedTo>[26-28]
149 <isAffiliatedTo>[28-30]
150 <isAffiliatedTo>[30-33]
151 <isAffiliatedTo>[33-35]
152 <isAffiliatedTo>[35-37]
153 <isAffiliatedTo>[37-40]
154 <isAffiliatedTo>[40-42]
155 <isAffiliatedTo>[42-44]
156 <isAffiliatedTo>[44-47]
157 <isAffiliatedTo>[47-49]
158 <isAffiliatedTo>[49-51]
159 <isAffiliatedTo>[51-53]
160 <isAffiliatedTo>[53-55]
161 <isAffiliatedTo>[55-57]
162 <isAffiliatedTo>[59-59]
163 <created>[0-3]
164 <created>[3-5]
165 <created>[5-10]
166 <created>[10-12]
167 <created>[12-17]
168 <created>[17-19]
169 <created>[19-25]
170 <created>[25-29]
171 <created>[29-32]
172 <created>[32-36]
173 <created>[36-42]
174 <created>[42-47]
175 <created>[47-53]
176 <created>[59-59]
File diff suppressed because it is too large Load Diff
-60
View File
@@ -1,60 +0,0 @@
0 -431 1782
1 1783 1848
2 1849 1870
3 1871 1888
4 1889 1899
5 1900 1906
6 1907 1912
7 1913 1917
8 1918 1922
9 1923 1926
10 1927 1930
11 1931 1934
12 1935 1938
13 1939 1941
14 1942 1944
15 1945 1947
16 1948 1950
17 1951 1953
18 1954 1956
19 1957 1959
20 1960 1962
21 1963 1965
22 1966 1967
23 1968 1969
24 1970 1971
25 1972 1973
26 1974 1975
27 1976 1977
28 1978 1979
29 1980 1981
30 1982 1983
31 1984 1985
32 1986 1987
33 1988 1989
34 1990 1991
35 1992 1993
36 1994 1994
37 1995 1996
38 1997 1997
39 1998 1998
40 1999 1999
41 2000 2000
42 2001 2001
43 2002 2002
44 2003 2003
45 2004 2004
46 2005 2005
47 2006 2006
48 2007 2007
49 2008 2008
50 2009 2009
51 2010 2010
52 2011 2011
53 2012 2012
54 2013 2013
55 2014 2014
56 2015 2015
57 2016 2016
58 2017 2017
59 2018 2018
-63925
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-15
View File
@@ -1,15 +0,0 @@
# triples: 78032
# entities: 10526
# relations: 177
# timesteps: 46
# test triples: 6909
# valid triples: 7198
# train triples: 63925
Measure method: N/A
Target Size : 0
Grow Factor: 0
Shrink Factor: 0
Epsilon Factor: 5.0
Search method: N/A
filter_dupes: both
nonames: False
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-177
View File
@@ -1,177 +0,0 @@
0 <wasBornIn>[0-2]
1 <wasBornIn>[2-5]
2 <wasBornIn>[5-7]
3 <wasBornIn>[7-10]
4 <wasBornIn>[10-12]
5 <wasBornIn>[12-15]
6 <wasBornIn>[15-17]
7 <wasBornIn>[17-20]
8 <wasBornIn>[20-22]
9 <wasBornIn>[22-25]
10 <wasBornIn>[25-27]
11 <wasBornIn>[27-30]
12 <wasBornIn>[30-32]
13 <wasBornIn>[32-35]
14 <wasBornIn>[35-45]
15 <wasBornIn>[52-52]
16 <diedIn>[0-3]
17 <diedIn>[3-5]
18 <diedIn>[5-7]
19 <diedIn>[7-10]
20 <diedIn>[10-12]
21 <diedIn>[12-14]
22 <diedIn>[14-17]
23 <diedIn>[17-19]
24 <diedIn>[19-21]
25 <diedIn>[21-23]
26 <diedIn>[23-25]
27 <diedIn>[25-27]
28 <diedIn>[27-29]
29 <diedIn>[29-32]
30 <diedIn>[32-34]
31 <diedIn>[34-36]
32 <diedIn>[36-38]
33 <diedIn>[38-40]
34 <diedIn>[40-42]
35 <diedIn>[42-44]
36 <diedIn>[44-47]
37 <diedIn>[47-49]
38 <diedIn>[49-51]
39 <diedIn>[51-53]
40 <diedIn>[53-55]
41 <diedIn>[55-57]
42 <diedIn>[59-59]
43 <worksAt>[0-3]
44 <worksAt>[3-5]
45 <worksAt>[5-7]
46 <worksAt>[7-10]
47 <worksAt>[10-12]
48 <worksAt>[12-14]
49 <worksAt>[14-17]
50 <worksAt>[17-19]
51 <worksAt>[19-21]
52 <worksAt>[21-23]
53 <worksAt>[23-25]
54 <worksAt>[25-27]
55 <worksAt>[27-29]
56 <worksAt>[29-32]
57 <worksAt>[32-34]
58 <worksAt>[34-36]
59 <worksAt>[36-40]
60 <worksAt>[40-42]
61 <worksAt>[42-47]
62 <worksAt>[47-53]
63 <worksAt>[59-59]
64 <playsFor>[0-3]
65 <playsFor>[3-5]
66 <playsFor>[5-23]
67 <playsFor>[23-25]
68 <playsFor>[25-27]
69 <playsFor>[27-29]
70 <playsFor>[29-32]
71 <playsFor>[32-34]
72 <playsFor>[34-36]
73 <playsFor>[36-38]
74 <playsFor>[38-40]
75 <playsFor>[40-42]
76 <playsFor>[42-44]
77 <playsFor>[44-47]
78 <playsFor>[47-51]
79 <playsFor>[59-59]
80 <hasWonPrize>[1-4]
81 <hasWonPrize>[4-6]
82 <hasWonPrize>[6-8]
83 <hasWonPrize>[8-11]
84 <hasWonPrize>[11-15]
85 <hasWonPrize>[15-18]
86 <hasWonPrize>[18-22]
87 <hasWonPrize>[22-26]
88 <hasWonPrize>[26-30]
89 <hasWonPrize>[30-33]
90 <hasWonPrize>[33-37]
91 <hasWonPrize>[37-47]
92 <hasWonPrize>[47-53]
93 <hasWonPrize>[59-59]
94 <isMarriedTo>[0-3]
95 <isMarriedTo>[3-5]
96 <isMarriedTo>[5-7]
97 <isMarriedTo>[7-10]
98 <isMarriedTo>[10-12]
99 <isMarriedTo>[12-14]
100 <isMarriedTo>[14-17]
101 <isMarriedTo>[17-19]
102 <isMarriedTo>[19-21]
103 <isMarriedTo>[21-23]
104 <isMarriedTo>[23-25]
105 <isMarriedTo>[25-27]
106 <isMarriedTo>[27-29]
107 <isMarriedTo>[29-32]
108 <isMarriedTo>[32-34]
109 <isMarriedTo>[34-38]
110 <isMarriedTo>[38-42]
111 <isMarriedTo>[42-47]
112 <isMarriedTo>[47-51]
113 <isMarriedTo>[51-55]
114 <isMarriedTo>[59-59]
115 <owns>[0-10]
116 <owns>[10-17]
117 <owns>[17-19]
118 <owns>[19-23]
119 <owns>[23-36]
120 <owns>[36-38]
121 <owns>[59-59]
122 <graduatedFrom>[0-3]
123 <graduatedFrom>[3-5]
124 <graduatedFrom>[5-7]
125 <graduatedFrom>[7-10]
126 <graduatedFrom>[10-14]
127 <graduatedFrom>[14-17]
128 <graduatedFrom>[17-19]
129 <graduatedFrom>[19-21]
130 <graduatedFrom>[21-23]
131 <graduatedFrom>[23-27]
132 <graduatedFrom>[27-32]
133 <graduatedFrom>[32-34]
134 <graduatedFrom>[34-38]
135 <graduatedFrom>[38-42]
136 <graduatedFrom>[59-59]
137 <isAffiliatedTo>[1-4]
138 <isAffiliatedTo>[4-6]
139 <isAffiliatedTo>[6-8]
140 <isAffiliatedTo>[8-11]
141 <isAffiliatedTo>[11-13]
142 <isAffiliatedTo>[13-15]
143 <isAffiliatedTo>[15-18]
144 <isAffiliatedTo>[18-20]
145 <isAffiliatedTo>[20-22]
146 <isAffiliatedTo>[22-24]
147 <isAffiliatedTo>[24-26]
148 <isAffiliatedTo>[26-28]
149 <isAffiliatedTo>[28-30]
150 <isAffiliatedTo>[30-33]
151 <isAffiliatedTo>[33-35]
152 <isAffiliatedTo>[35-37]
153 <isAffiliatedTo>[37-40]
154 <isAffiliatedTo>[40-42]
155 <isAffiliatedTo>[42-44]
156 <isAffiliatedTo>[44-47]
157 <isAffiliatedTo>[47-49]
158 <isAffiliatedTo>[49-51]
159 <isAffiliatedTo>[51-53]
160 <isAffiliatedTo>[53-55]
161 <isAffiliatedTo>[55-57]
162 <isAffiliatedTo>[59-59]
163 <created>[0-3]
164 <created>[3-5]
165 <created>[5-10]
166 <created>[10-12]
167 <created>[12-17]
168 <created>[17-19]
169 <created>[19-25]
170 <created>[25-29]
171 <created>[29-32]
172 <created>[32-36]
173 <created>[36-42]
174 <created>[42-47]
175 <created>[47-53]
176 <created>[59-59]
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-60
View File
@@ -1,60 +0,0 @@
0 -431 1782
1 1783 1848
2 1849 1870
3 1871 1888
4 1889 1899
5 1900 1906
6 1907 1912
7 1913 1917
8 1918 1922
9 1923 1926
10 1927 1930
11 1931 1934
12 1935 1938
13 1939 1941
14 1942 1944
15 1945 1947
16 1948 1950
17 1951 1953
18 1954 1956
19 1957 1959
20 1960 1962
21 1963 1965
22 1966 1967
23 1968 1969
24 1970 1971
25 1972 1973
26 1974 1975
27 1976 1977
28 1978 1979
29 1980 1981
30 1982 1983
31 1984 1985
32 1986 1987
33 1988 1989
34 1990 1991
35 1992 1993
36 1994 1994
37 1995 1996
38 1997 1997
39 1998 1998
40 1999 1999
41 2000 2000
42 2001 2001
43 2002 2002
44 2003 2003
45 2004 2004
46 2005 2005
47 2006 2006
48 2007 2007
49 2008 2008
50 2009 2009
51 2010 2010
52 2011 2011
53 2012 2012
54 2013 2013
55 2014 2014
56 2015 2015
57 2016 2016
58 2017 2017
59 2018 2018
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-136
View File
@@ -1,136 +0,0 @@
import torch
import numpy as np
from torch.utils.data import Dataset
class TrainDataset(Dataset):
"""
Training Dataset class.
Parameters
----------
triples: The triples used for training the model
params: Parameters for the experiments
Returns
-------
A training Dataset class instance used by DataLoader
"""
def __init__(self, triples, params):
self.triples = triples
self.p = params
self.strategy = self.p.train_strategy
self.entities = np.arange(self.p.num_ent, dtype=np.int32)
def __len__(self):
return len(self.triples)
def __getitem__(self, idx):
ele = self.triples[idx]
triple, label, sub_samp = torch.LongTensor(ele['triple']), np.int32(
ele['label']), np.float32(ele['sub_samp'])
trp_label = self.get_label(label)
if self.p.lbl_smooth != 0.0:
trp_label = (1.0 - self.p.lbl_smooth) * \
trp_label + (1.0/self.p.num_ent)
if self.strategy == 'one_to_n':
return triple, trp_label, None, None
elif self.strategy == 'one_to_x':
sub_samp = torch.FloatTensor([sub_samp])
neg_ent = torch.LongTensor(self.get_neg_ent(triple, label))
return triple, trp_label, neg_ent, sub_samp
else:
raise NotImplementedError
@staticmethod
def collate_fn(data):
triple = torch.stack([_[0] for _ in data], dim=0)
trp_label = torch.stack([_[1] for _ in data], dim=0)
if not data[0][2] is None: # one_to_x
neg_ent = torch.stack([_[2] for _ in data], dim=0)
sub_samp = torch.cat([_[3] for _ in data], dim=0)
return triple, trp_label, neg_ent, sub_samp
else:
return triple, trp_label
def get_neg_ent(self, triple, label):
def get(triple, label):
if self.strategy == 'one_to_x':
pos_obj = triple[2]
mask = np.ones([self.p.num_ent], dtype=np.bool)
mask[label] = 0
neg_ent = np.int32(np.random.choice(
self.entities[mask], self.p.neg_num, replace=False)).reshape([-1])
neg_ent = np.concatenate((pos_obj.reshape([-1]), neg_ent))
else:
pos_obj = label
mask = np.ones([self.p.num_ent], dtype=np.bool)
mask[label] = 0
neg_ent = np.int32(np.random.choice(
self.entities[mask], self.p.neg_num - len(label), replace=False)).reshape([-1])
neg_ent = np.concatenate((pos_obj.reshape([-1]), neg_ent))
if len(neg_ent) > self.p.neg_num:
import pdb
pdb.set_trace()
return neg_ent
neg_ent = get(triple, label)
return neg_ent
def get_label(self, label):
if self.strategy == 'one_to_n':
y = np.zeros([self.p.num_ent], dtype=np.float32)
for e2 in label:
y[e2] = 1.0
elif self.strategy == 'one_to_x':
y = [1] + [0] * self.p.neg_num
else:
raise NotImplementedError
return torch.FloatTensor(y)
class TestDataset(Dataset):
"""
Evaluation Dataset class.
Parameters
----------
triples: The triples used for evaluating the model
params: Parameters for the experiments
Returns
-------
An evaluation Dataset class instance used by DataLoader for model evaluation
"""
def __init__(self, triples, params):
self.triples = triples
self.p = params
def __len__(self):
return len(self.triples)
def __getitem__(self, idx):
ele = self.triples[idx]
triple, label = torch.LongTensor(ele['triple']), np.int32(ele['label'])
label = self.get_label(label)
return triple, label
@staticmethod
def collate_fn(data):
triple = torch.stack([_[0] for _ in data], dim=0)
label = torch.stack([_[1] for _ in data], dim=0)
return triple, label
def get_label(self, label):
y = np.zeros([self.p.num_ent], dtype=np.float32)
for e2 in label:
y[e2] = 1.0
return torch.FloatTensor(y)
@@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
Can't render this file because it is too large.
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
View File

Some files were not shown because too many files have changed in this diff Show More