Compare commits
25 Commits
main
...
tourier_sp
Author | SHA1 | Date | |
---|---|---|---|
45ce0c995b | |||
|
53443a4026 | ||
|
c050d8f9d9 | ||
|
f2052c2839 | ||
|
ddbaa2781f | ||
|
e7e42739e2 | ||
|
dcb11706aa | ||
|
bbbac51e4c | ||
|
14001cf80d | ||
|
835213ab1b | ||
|
bb6d06a903 | ||
|
ff855256e0 | ||
|
d912e0a225 | ||
|
9d182abadb | ||
|
e5a343b0c5 | ||
|
a7fb599368 | ||
|
03b38c7e99 | ||
|
87f45862d6 | ||
|
96b67658f4 | ||
|
410e725bf2 | ||
|
c8e8d8a660 | ||
|
9aa85307af | ||
dfec7ff331 | |||
5f1518cfd9 | |||
|
54e6fbc84c |
21
LICENSE
21
LICENSE
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 ZJUNLP
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -1,6 +0,0 @@
|
||||
|
||||
|
||||
from transformers import BartForConditionalGeneration, T5ForConditionalGeneration, GPT2LMHeadModel
|
||||
|
||||
from .model import *
|
||||
|
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 3994,
|
||||
"#kept_examples": 3994,
|
||||
"#mappable_examples": 743,
|
||||
"#multiple_answer_examples": 2
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 3996,
|
||||
"#kept_examples": 3996,
|
||||
"#mappable_examples": 755,
|
||||
"#multiple_answer_examples": 0
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 20358,
|
||||
"#kept_examples": 20358,
|
||||
"#mappable_examples": 3713,
|
||||
"#multiple_answer_examples": 4
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 3994,
|
||||
"#kept_examples": 3994,
|
||||
"#mappable_examples": 743,
|
||||
"#multiple_answer_examples": 2
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 3996,
|
||||
"#kept_examples": 3996,
|
||||
"#mappable_examples": 755,
|
||||
"#multiple_answer_examples": 0
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 20358,
|
||||
"#kept_examples": 20358,
|
||||
"#mappable_examples": 3713,
|
||||
"#multiple_answer_examples": 4
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,403 +0,0 @@
|
||||
import json
|
||||
import math
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from transformers import BertTokenizer, BertForMaskedLM, AdamW, get_linear_schedule_with_warmup, AutoConfig
|
||||
|
||||
import torch
|
||||
from torch import device, nn
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.loggers import TensorBoardLogger
|
||||
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
|
||||
from pytorch_lightning.utilities.seed import seed_everything
|
||||
from transformers.tokenization_bert import BertTokenizerFast
|
||||
|
||||
from kge.model import KgeModel
|
||||
from kge.util.io import load_checkpoint
|
||||
from kge.util import sc
|
||||
# from relphormer.lit_models import TransformerLitModel
|
||||
from relphormer.models import BertKGC
|
||||
# from relphormer.data import KGC
|
||||
import os
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES']='4'
|
||||
|
||||
MODEL = 'bert-base-uncased'
|
||||
tokenizer = BertTokenizer.from_pretrained(MODEL)
|
||||
|
||||
|
||||
class FBQADataset(Dataset):
|
||||
|
||||
def __init__(self, file_dir):
|
||||
self.examples = json.load(Path(file_dir).open("rb"))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.examples)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
if torch.is_tensor(idx):
|
||||
idx = idx.tolist()
|
||||
return self.examples[idx]
|
||||
|
||||
|
||||
def fbqa_collate(samples):
|
||||
questions = []
|
||||
answers = []
|
||||
answer_ids = []
|
||||
entities = []
|
||||
entity_names = []
|
||||
relations = []
|
||||
for item in samples:
|
||||
q = item["RawQuestion"] + "[MASK]" * len(item["AnswerEntity"]) + "."
|
||||
questions.append(q)
|
||||
answers.append(item["AnswerEntity"])
|
||||
answer_ids.append(item["AnswerEntityID"])
|
||||
entities.append(item["TopicEntityID"])
|
||||
entity_names.append(item["TopicEntityName"])
|
||||
relations.append(item["RelationID"])
|
||||
|
||||
questions = tokenizer(questions, return_tensors='pt', padding=True)
|
||||
entity_names = tokenizer(entity_names, add_special_tokens=False)
|
||||
answers, answers_lengths = sc.pad_seq_of_seq(answers)
|
||||
answers = torch.LongTensor(answers)
|
||||
answers_lengths = torch.LongTensor(answers_lengths)
|
||||
answer_ids = torch.LongTensor(answer_ids)
|
||||
|
||||
input_ids = questions['input_ids']
|
||||
masked_labels = torch.ones_like(input_ids) * -100
|
||||
masked_labels[input_ids == tokenizer.mask_token_id] = answers[answers != 0]
|
||||
entity_mask = torch.zeros_like(input_ids).bool()
|
||||
entity_span_index = input_ids.new_zeros((len(input_ids), 2))
|
||||
for i, e_tokens in enumerate(entity_names['input_ids']):
|
||||
q_tokens = input_ids[i].tolist()
|
||||
for s_index in range(len(q_tokens) - len(e_tokens)):
|
||||
if all([e_token == q_tokens[s_index + j] for j, e_token in enumerate(e_tokens)]):
|
||||
entity_mask[i][s_index:s_index + len(e_tokens)] = True
|
||||
entity_span_index[i][0] = s_index
|
||||
entity_span_index[i][1] = s_index + len(e_tokens) - 1
|
||||
break
|
||||
|
||||
entities = torch.LongTensor(entities)
|
||||
relations = torch.LongTensor(relations)
|
||||
|
||||
return questions.data, masked_labels, answers, answers_lengths, answer_ids, entities, relations, entity_mask, entity_span_index
|
||||
|
||||
|
||||
class SelfOutput(nn.Module):
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
||||
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
||||
|
||||
def forward(self, hidden_states, input_tensor):
|
||||
hidden_states = self.dense(hidden_states)
|
||||
hidden_states = self.dropout(hidden_states)
|
||||
hidden_states = self.LayerNorm(hidden_states + input_tensor)
|
||||
return hidden_states
|
||||
|
||||
|
||||
class CrossAttention(nn.Module):
|
||||
def __init__(self, config, ctx_hidden_size):
|
||||
super().__init__()
|
||||
self.self = CrossAttentionInternal(config, ctx_hidden_size)
|
||||
self.output = SelfOutput(config)
|
||||
self.config = config
|
||||
self.apply(self._init_weights)
|
||||
|
||||
def _init_weights(self, module):
|
||||
""" Initialize the weights """
|
||||
if isinstance(module, (nn.Linear, nn.Embedding)):
|
||||
# Slightly different from the TF version which uses truncated_normal for initialization
|
||||
# cf https://github.com/pytorch/pytorch/pull/5617
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
elif isinstance(module, nn.LayerNorm):
|
||||
module.bias.data.zero_()
|
||||
module.weight.data.fill_(1.0)
|
||||
if isinstance(module, nn.Linear) and module.bias is not None:
|
||||
module.bias.data.zero_()
|
||||
|
||||
def forward(
|
||||
self,
|
||||
hidden_states,
|
||||
attention_mask=None,
|
||||
head_mask=None,
|
||||
encoder_hidden_states=None,
|
||||
encoder_attention_mask=None,
|
||||
output_attentions=False,
|
||||
):
|
||||
self_outputs = self.self(
|
||||
hidden_states,
|
||||
attention_mask,
|
||||
head_mask,
|
||||
encoder_hidden_states,
|
||||
encoder_attention_mask,
|
||||
output_attentions,
|
||||
)
|
||||
attention_output = self.output(self_outputs[0], hidden_states)
|
||||
outputs = (attention_output,) + self_outputs[1:] # add attentions if we output them
|
||||
return outputs
|
||||
|
||||
|
||||
class CrossAttentionInternal(nn.Module):
|
||||
def __init__(self, config, ctx_hidden_size):
|
||||
super().__init__()
|
||||
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
|
||||
raise ValueError(
|
||||
"The hidden size (%d) is not a multiple of the number of attention "
|
||||
"heads (%d)" % (config.hidden_size, config.num_attention_heads)
|
||||
)
|
||||
|
||||
self.num_attention_heads = config.num_attention_heads
|
||||
self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
|
||||
self.all_head_size = self.num_attention_heads * self.attention_head_size
|
||||
|
||||
self.query = nn.Linear(config.hidden_size, self.all_head_size)
|
||||
self.key = nn.Linear(ctx_hidden_size, self.all_head_size)
|
||||
self.value = nn.Linear(ctx_hidden_size, self.all_head_size)
|
||||
|
||||
self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
|
||||
|
||||
def transpose_for_scores(self, x):
|
||||
new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
|
||||
x = x.view(*new_x_shape)
|
||||
return x.permute(0, 2, 1, 3)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
hidden_states,
|
||||
attention_mask=None,
|
||||
head_mask=None,
|
||||
encoder_hidden_states=None,
|
||||
encoder_attention_mask=None,
|
||||
output_attentions=False,
|
||||
):
|
||||
mixed_query_layer = self.query(hidden_states)
|
||||
|
||||
# If this is instantiated as a cross-attention module, the keys
|
||||
# and values come from an encoder; the attention mask needs to be
|
||||
# such that the encoder's padding tokens are not attended to.
|
||||
mixed_key_layer = self.key(encoder_hidden_states)
|
||||
mixed_value_layer = self.value(encoder_hidden_states)
|
||||
attention_mask = encoder_attention_mask
|
||||
|
||||
query_layer = self.transpose_for_scores(mixed_query_layer)
|
||||
key_layer = self.transpose_for_scores(mixed_key_layer)
|
||||
value_layer = self.transpose_for_scores(mixed_value_layer)
|
||||
|
||||
# Take the dot product between "query" and "key" to get the raw attention scores.
|
||||
attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
|
||||
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
|
||||
if attention_mask is not None:
|
||||
# Apply the attention mask is (precomputed for all layers in BertModel forward() function)
|
||||
attention_scores = attention_scores + attention_mask
|
||||
|
||||
# Normalize the attention scores to probabilities.
|
||||
attention_probs = nn.Softmax(dim=-1)(attention_scores)
|
||||
|
||||
# This is actually dropping out entire tokens to attend to, which might
|
||||
# seem a bit unusual, but is taken from the original Transformer paper.
|
||||
attention_probs = self.dropout(attention_probs)
|
||||
|
||||
# Mask heads if we want to
|
||||
if head_mask is not None:
|
||||
attention_probs = attention_probs * head_mask
|
||||
|
||||
context_layer = torch.matmul(attention_probs, value_layer)
|
||||
|
||||
context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
|
||||
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
|
||||
context_layer = context_layer.view(*new_context_layer_shape)
|
||||
|
||||
outputs = (context_layer, nn.Softmax(dim=-1)(attention_scores)) if output_attentions else (context_layer,)
|
||||
return outputs
|
||||
|
||||
|
||||
class CrossTrmFinetuner(pl.LightningModule):
|
||||
def __init__(self, hparams, bertmodel):
|
||||
super().__init__()
|
||||
self._hparams = hparams
|
||||
|
||||
self.lr = hparams['lr']
|
||||
self.weight_decay = hparams['weight_decay']
|
||||
|
||||
self.kg_dim = 320
|
||||
# self.bert = BertForMaskedLM.from_pretrained(MODEL)
|
||||
self.bert = bertmodel
|
||||
|
||||
if self._hparams['use_hitter']:
|
||||
self.kg_layer_num = 10
|
||||
self.cross_attentions = nn.ModuleList([CrossAttention(self.bert.config, self.kg_dim)
|
||||
for _ in range(self.kg_layer_num)])
|
||||
checkpoint = load_checkpoint('local/best/20200812-174221-trmeh-fb15k237-best/checkpoint_best.pt')
|
||||
self.hitter = KgeModel.create_from(checkpoint)
|
||||
|
||||
def forward(self, batch):
|
||||
sent_input, masked_labels, batch_labels, label_lens, answer_ids, s, p, entity_mask, entity_span_index = batch
|
||||
|
||||
if self._hparams['use_hitter']:
|
||||
# kg_masks: [bs, 1, 1, length]
|
||||
# kg_embeds: nlayer*[bs, length, dim]
|
||||
kg_embeds, kg_masks = self.hitter('get_hitter_repr', s, p)
|
||||
kg_attentions = [None] * 2 + [(self.cross_attentions[i], kg_embeds[(i + 2) // 2], kg_masks)
|
||||
for i in range(self.kg_layer_num)]
|
||||
else:
|
||||
kg_attentions = []
|
||||
|
||||
out = self.bert(kg_attentions=kg_attentions,
|
||||
output_attentions=True,
|
||||
output_hidden_states=True,
|
||||
return_dict=True,
|
||||
labels=masked_labels,
|
||||
**sent_input,
|
||||
)
|
||||
|
||||
return out
|
||||
|
||||
def training_step(self, batch, batch_idx):
|
||||
output = self(batch)
|
||||
loss = output.loss
|
||||
self.log('train_loss', loss, on_epoch=True, prog_bar=True)
|
||||
return {'loss': loss}
|
||||
|
||||
def validation_step(self, batch, batch_idx):
|
||||
batch_inputs, masked_labels, batch_labels, label_lens, answer_ids, s, p, entity_mask, _ = batch
|
||||
output = self(batch)
|
||||
input_tokens = batch_inputs["input_ids"].clone()
|
||||
|
||||
logits = output.logits[masked_labels != -100]
|
||||
probs = logits.softmax(dim=-1)
|
||||
values, predictions = probs.topk(1)
|
||||
hits = []
|
||||
now_pos = 0
|
||||
for sample_i, label_length in enumerate(label_lens.tolist()):
|
||||
failed = False
|
||||
for i in range(label_length):
|
||||
if (predictions[now_pos + i] == batch_labels[sample_i][i]).sum() != 1:
|
||||
failed = True
|
||||
break
|
||||
hits += [1] if not failed else [0]
|
||||
now_pos += label_length
|
||||
hits = torch.tensor(hits)
|
||||
input_tokens[input_tokens == tokenizer.mask_token_id] = predictions.flatten()
|
||||
pred_strings = [str(hits[i].item()) + ' ' + tokenizer.decode(input_tokens[i], skip_special_tokens=True)
|
||||
for i in range(input_tokens.size(0))]
|
||||
|
||||
return {'val_loss': output.loss,
|
||||
'val_acc': hits.float(),
|
||||
'pred_strings': pred_strings}
|
||||
|
||||
def validation_epoch_end(self, outputs):
|
||||
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
|
||||
avg_val_acc = torch.cat([x['val_acc'] for x in outputs]).mean().to(avg_loss.device)
|
||||
|
||||
if self.global_rank == 0:
|
||||
tensorboard = self.logger.experiment
|
||||
tensorboard.add_text('pred', '\n\n'.join(sum([x['pred_strings'] for x in outputs], [])), self.global_step)
|
||||
|
||||
self.log('avg_loss', avg_loss, on_epoch=True, prog_bar=True, sync_dist=True)
|
||||
self.log('avg_val_acc', avg_val_acc, on_epoch=True, prog_bar=True, sync_dist=True)
|
||||
return {'val_loss': avg_loss}
|
||||
|
||||
def train_dataloader(self):
|
||||
return DataLoader(FBQADataset(self._hparams['train_dataset']),
|
||||
self._hparams['batch_size'],
|
||||
shuffle=True,
|
||||
collate_fn=fbqa_collate,
|
||||
num_workers=0)
|
||||
|
||||
def val_dataloader(self):
|
||||
return DataLoader(FBQADataset(self._hparams['val_dataset']),
|
||||
1,
|
||||
shuffle=False,
|
||||
collate_fn=fbqa_collate,
|
||||
num_workers=0)
|
||||
|
||||
def test_dataloader(self):
|
||||
return DataLoader(FBQADataset(self._hparams['test_dataset']),
|
||||
1,
|
||||
shuffle=False,
|
||||
collate_fn=fbqa_collate,
|
||||
num_workers=0)
|
||||
|
||||
def configure_optimizers(self):
|
||||
no_decay = ['bias', 'LayerNorm.weight']
|
||||
no_fine_tune = ['cross_attentions']
|
||||
pgs = [{'params': [p for n, p in self.named_parameters() if not any(nd in n for nd in no_decay) and not any([i in n for i in no_fine_tune])],
|
||||
'weight_decay': 0.01},
|
||||
{'params': [p for n, p in self.named_parameters() if any(nd in n for nd in no_decay) and not any([i in n for i in no_fine_tune])],
|
||||
'weight_decay': 0.0}]
|
||||
if self._hparams['use_hitter']:
|
||||
pgs.append({'params': self.cross_attentions.parameters(), 'lr': 5e-5, 'weight_decay': 0.01})
|
||||
# bert_optimizer = AdamW(pgs, lr=3e-5, weight_decay=1e-2)
|
||||
bert_optimizer = AdamW(pgs, lr=self.lr, weight_decay=self.weight_decay)
|
||||
bert_scheduler = {
|
||||
'scheduler': get_linear_schedule_with_warmup(bert_optimizer, self._hparams['max_steps'] // 10, self._hparams['max_steps']),
|
||||
'interval': 'step',
|
||||
'monitor': None
|
||||
}
|
||||
return [bert_optimizer], [bert_scheduler]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--exp_name", default='default', nargs='?', help="Name of the experiment")
|
||||
parser.add_argument('--dataset', choices=['fbqa', 'webqsp'], default='fbqa', help="fbqa or webqsp")
|
||||
parser.add_argument('--filtered', default=False, action='store_true', help="Filtered or not")
|
||||
parser.add_argument('--hitter', default=False, action='store_true', help="Use pretrained HittER or not")
|
||||
parser.add_argument('--relphormer', default=False, action='store_true', help="Use pretrained relphormer or not")
|
||||
parser.add_argument('--seed', default=333, type=int, help='Seed number')
|
||||
parser.add_argument('--lr', default=3e-5, type=float, help='learning rate')
|
||||
parser.add_argument('--weight_decay', default=1e-2, type=float, help='weight decay')
|
||||
args = parser.parse_args()
|
||||
seed_everything(args.seed)
|
||||
|
||||
QA_DATASET = args.dataset
|
||||
if args.filtered and args.relphormer:
|
||||
SUBSET = 'relphormer-filtered'
|
||||
elif not args.filtered and args.relphormer:
|
||||
SUBSET = 'relphormer'
|
||||
elif args.filtered and not args.relphormer:
|
||||
SUBSET = 'fb15k237-filtered'
|
||||
else:
|
||||
SUBSET = 'fb15k237'
|
||||
|
||||
hparams = {
|
||||
'use_hitter': args.hitter,
|
||||
'relphormer': args.relphormer,
|
||||
'lr': args.lr,
|
||||
'weight_decay': args.weight_decay,
|
||||
'batch_size': 16,
|
||||
'max_epochs': 20,
|
||||
'train_dataset': f'data/{QA_DATASET}/{SUBSET}/train.json',
|
||||
'val_dataset': f'data/{QA_DATASET}/{SUBSET}/test.json',
|
||||
'test_dataset': f'data/{QA_DATASET}/{SUBSET}/test.json',
|
||||
}
|
||||
|
||||
if hparams['relphormer']:
|
||||
MODEL = "./local/relphormer/"
|
||||
config = AutoConfig.from_pretrained(MODEL)
|
||||
bertmodel = BertForMaskedLM.from_pretrained(MODEL, config=config)
|
||||
model = CrossTrmFinetuner(hparams, bertmodel=bertmodel)
|
||||
else:
|
||||
bertmodel = BertForMaskedLM.from_pretrained(MODEL)
|
||||
model = CrossTrmFinetuner(hparams, bertmodel=bertmodel)
|
||||
model.hparams['max_steps'] = (len(model.train_dataloader().dataset) // hparams['batch_size'] + 1) * hparams['max_epochs']
|
||||
base_path = '/tmp/hitbert-paper'
|
||||
logger = TensorBoardLogger(base_path, args.exp_name)
|
||||
checkpoint_callback = ModelCheckpoint(
|
||||
monitor='avg_val_acc',
|
||||
dirpath=base_path + '/' + args.exp_name,
|
||||
filename='{epoch:02d}-{avg_val_acc:.3f}',
|
||||
save_top_k=1,
|
||||
mode='max')
|
||||
trainer = pl.Trainer(gpus=1, accelerator="ddp",
|
||||
max_epochs=hparams['max_epochs'], max_steps=model.hparams['max_steps'],
|
||||
checkpoint_callback=True,
|
||||
gradient_clip_val=1.0, logger=logger,
|
||||
callbacks=[LearningRateMonitor(), checkpoint_callback])
|
||||
trainer.fit(model)
|
||||
print("QA Task End!")
|
@ -1,8 +0,0 @@
|
||||
# from transformers.models.bert.modeling_bert import BertForMaskedLM
|
||||
from models.huggingface_relformer import BertForMaskedLM
|
||||
class BertKGC(BertForMaskedLM):
|
||||
|
||||
@staticmethod
|
||||
def add_to_argparse(parser):
|
||||
parser.add_argument("--pretrain", type=int, default=0, help="")
|
||||
return parser
|
@ -1,10 +0,0 @@
|
||||
for SEED in 111 222 333 444 555 666 777 888 999
|
||||
do
|
||||
# echo ${LR} ${WD}
|
||||
python hitter-bert.py --dataset fbqa \
|
||||
--relphormer \
|
||||
--seed ${SEED} \
|
||||
--exp_name relphormer-fbqa \
|
||||
--lr 3e-5 \
|
||||
--weight_decay 1e-2
|
||||
done
|
@ -1,13 +0,0 @@
|
||||
|
||||
for SEED in 111 222 333 444 555 666 777 888 999
|
||||
do
|
||||
|
||||
# echo ${LR} ${WD}
|
||||
python hitter-bert.py --dataset fbqa \
|
||||
--relphormer \
|
||||
--filtered \
|
||||
--seed ${SEED} \
|
||||
--exp_name relphormer-filtered-fbqa \
|
||||
--lr 3e-5 \
|
||||
--weight_decay 1e-2
|
||||
done
|
@ -1,10 +0,0 @@
|
||||
|
||||
for SEED in 222 333 444 555 666 777 888 999
|
||||
do
|
||||
python hitter-bert.py --dataset webqsp \
|
||||
--relphormer \
|
||||
--seed ${SEED} \
|
||||
--exp_name relphormer-webqsp \
|
||||
--lr 3e-5 \
|
||||
--weight_decay 1e-2
|
||||
done
|
@ -1,12 +0,0 @@
|
||||
|
||||
for SEED in 111 222 333 444 555 666 777 888 999
|
||||
do
|
||||
# echo ${LR} ${WD}
|
||||
python hitter-bert.py --dataset webqsp \
|
||||
--relphormer \
|
||||
--filtered \
|
||||
--seed ${SEED} \
|
||||
--exp_name relphormer-filtered-webqsp \
|
||||
--lr 3e-5 \
|
||||
--weight_decay 1e-2
|
||||
done
|
1159
QA/utils.py
1159
QA/utils.py
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 1639,
|
||||
"#kept_examples": 484,
|
||||
"#mappable_examples": 484,
|
||||
"#multiple_answer_examples": 800
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 3098,
|
||||
"#kept_examples": 850,
|
||||
"#mappable_examples": 850,
|
||||
"#multiple_answer_examples": 1437
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 1639,
|
||||
"#kept_examples": 1582,
|
||||
"#mappable_examples": 484,
|
||||
"#multiple_answer_examples": 800
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{
|
||||
"#examples": 3098,
|
||||
"#kept_examples": 2997,
|
||||
"#mappable_examples": 850,
|
||||
"#multiple_answer_examples": 1437
|
||||
}
|
File diff suppressed because it is too large
Load Diff
115
README.md
115
README.md
@ -1,115 +0,0 @@
|
||||
# Relphormer
|
||||
|
||||
Code for the paper: "Relphormer: Relational Graph Transformer for Knowledge Graph Representations".
|
||||
|
||||
> Transformers have achieved remarkable performance in widespread fields, including natural language processing, computer vision and graph mining. However, vanilla Transformer architectures have not yielded promising improvements in the Knowledge Graph (KG) representations, where the translational distance paradigm dominates this area. Note that vanilla Transformer architectures struggle to capture the intrinsically heterogeneous semantic and structural information of knowledge graphs. To this end, we propose a new variant of Transformer for knowledge graph representations dubbed Relphormer. Specifically, we introduce Triple2Seq which can dynamically sample contextualized sub-graph sequences as the input to alleviate the heterogeneity issue. We propose a novel structure-enhanced self-attention mechanism to encode the relational information and keep the globally semantic information among sub-graphs. Moreover, we propose masked knowledge modeling as a new paradigm for knowledge graph representation learning. We apply Relphormer to three tasks, namely, knowledge graph completion, KG-based question answering and KG-based recommendation for evaluation. Experimental results show that Relphormer can obtain better performance on benchmark datasets compared with baselines.
|
||||
|
||||
|
||||
# Model Architecture
|
||||
|
||||
<div align=center>
|
||||
<img src="./resource/model.png" width="85%" height="75%" />
|
||||
</div>
|
||||
|
||||
|
||||
The model architecture of Relphormer.
|
||||
The contextualized sub-graph is sampled with Triple2Seq, and then it will be converted into sequences while maintaining its sub-graph structure.
|
||||
Next, we conduct masked knowledge modeling, which randomly masks the nodes in the center triple in the contextualized sub-graph sequences.
|
||||
For the transformer architecture, we design a novel structure-enhanced mechanism to preserve the structure feature.
|
||||
Finally, we utilize our pre-trained KG transformer for KG-based downstream tasks.
|
||||
|
||||
# Environments
|
||||
|
||||
- python (3.8.13)
|
||||
- cuda(11.2)
|
||||
- Ubuntu-18.04.6 (4.15.0-156-generic)
|
||||
|
||||
# Requirements
|
||||
|
||||
To run the codes, you need to install the requirements:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
The expected structure of files is:
|
||||
|
||||
```
|
||||
── Relphormer
|
||||
├── data
|
||||
├── dataset
|
||||
│ ├── FB15k-237
|
||||
│ ├── WN18RR
|
||||
│ ├── umls
|
||||
│ ├── create_neighbor.py
|
||||
├── lit_models
|
||||
│ ├── _init_.py
|
||||
│ ├── base.py
|
||||
│ ├── transformer.py
|
||||
│ └── utils.py
|
||||
├── models
|
||||
│ ├── _init_.py
|
||||
│ ├── huggingface_relformer.py
|
||||
│ ├── model.py
|
||||
│ └── utils.py
|
||||
├── resource
|
||||
│ └── model.png
|
||||
├── scripts
|
||||
│ ├── fb15k-237
|
||||
│ ├── wn18rr
|
||||
│ └── umls
|
||||
├── QA
|
||||
├── logs
|
||||
├── main.py
|
||||
└── requirements.txt
|
||||
```
|
||||
|
||||
# How to run
|
||||
|
||||
## KGC Task
|
||||
|
||||
### Generate Masked Neighbors
|
||||
|
||||
- Use the command below to generate the masked neighbors.
|
||||
```shell
|
||||
>> cd dataset
|
||||
>> python create_neighbor.py --dataset xxx # like python create_neighbor.py --dataset umls
|
||||
```
|
||||
|
||||
### Entity Embedding Initialization
|
||||
|
||||
- Then use the command below to add entities to BERT and initialize the entity embedding layer to be used in the later training. For other datasets `FB15k-237` and `WN18RR` , just replace the dataset name with `fb15k-237` and `wn18rr` will be fine.
|
||||
|
||||
```shell
|
||||
>> cd pretrain
|
||||
>> mkdir logs
|
||||
>> bash scripts/pretrain_umls.sh
|
||||
>> tail -f -n 2000 logs/pretrain_umls.log
|
||||
```
|
||||
|
||||
The pretrained models are saved in the `Relphormer/pretrain/output` directory.
|
||||
|
||||
### Entity Prediction
|
||||
|
||||
- Next use the command below to train the model to predict the correct entity in the masked position. Same as above for other datasets.
|
||||
|
||||
```shell
|
||||
>> cd Relphormer
|
||||
>> mkdir logs
|
||||
>> bash scripts/umls/umls.sh
|
||||
>> tail -f -n 2000 logs/train_umls.log
|
||||
```
|
||||
|
||||
The trained models are saved in the `Relphormer/output` directory.
|
||||
|
||||
## QA Task
|
||||
The experimental settings in QA follow the [Hitter](https://arxiv.org/pdf/2008.12813.pdf) experimental settings, and the environment installation can be done by referring to [GitHub](https://github.com/microsoft/HittER). We only modified **hitter-best.py** to fit our model.
|
||||
|
||||
- The relphormer model used by QA can be downloaded [here](https://drive.google.com/file/d/1FK_A_kFq1ECoNm75RfkcvYv8rZiJL1Bw/view?usp=sharing).
|
||||
|
||||
```shell
|
||||
>> cd QA
|
||||
>> sh scripts/relphormer_fbqa.sh
|
||||
>> sh scripts/relphormer_fbqa_filtered.sh
|
||||
>> sh scripts/relphormer_webqsp.sh
|
||||
>> sh scripts/relphormer_webqsp_filtered.sh
|
||||
```
|
24
config/log_config.json
Normal file
24
config/log_config.json
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"version": 1,
|
||||
"disable_existing_loggers": false,
|
||||
"formatters": {
|
||||
"simple": {
|
||||
"format": "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s"
|
||||
}
|
||||
},
|
||||
"handlers": {
|
||||
"file_handler": {
|
||||
"class": "logging.FileHandler",
|
||||
"level": "DEBUG",
|
||||
"formatter": "simple",
|
||||
"filename": "python_logging.log",
|
||||
"encoding": "utf8"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"level": "DEBUG",
|
||||
"handlers": [
|
||||
"file_handler"
|
||||
]
|
||||
}
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
from .data_module import KGC
|
||||
from .processor import convert_examples_to_features, KGProcessor
|
@ -1,63 +0,0 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import cython
|
||||
from cython.parallel cimport prange, parallel
|
||||
cimport numpy
|
||||
import numpy
|
||||
|
||||
def floyd_warshall(adjacency_matrix):
|
||||
|
||||
(nrows, ncols) = adjacency_matrix.shape
|
||||
assert nrows == ncols
|
||||
cdef unsigned int n = nrows
|
||||
|
||||
adj_mat_copy = adjacency_matrix.astype(long, order='C', casting='safe', copy=True)
|
||||
assert adj_mat_copy.flags['C_CONTIGUOUS']
|
||||
cdef numpy.ndarray[long, ndim=2, mode='c'] M = adj_mat_copy
|
||||
cdef numpy.ndarray[long, ndim=2, mode='c'] path = numpy.zeros([n, n], dtype=numpy.int64)
|
||||
|
||||
cdef unsigned int i, j, k
|
||||
cdef long M_ij, M_ik, cost_ikkj
|
||||
cdef long* M_ptr = &M[0,0]
|
||||
cdef long* M_i_ptr
|
||||
cdef long* M_k_ptr
|
||||
|
||||
# set unreachable nodes distance to 510
|
||||
for i in range(n):
|
||||
for j in range(n):
|
||||
if i == j:
|
||||
M[i][j] = 0
|
||||
elif M[i][j] == 0:
|
||||
M[i][j] = 510
|
||||
|
||||
# floyed algo
|
||||
for k in range(n):
|
||||
M_k_ptr = M_ptr + n*k
|
||||
for i in range(n):
|
||||
M_i_ptr = M_ptr + n*i
|
||||
M_ik = M_i_ptr[k]
|
||||
for j in range(n):
|
||||
cost_ikkj = M_ik + M_k_ptr[j]
|
||||
M_ij = M_i_ptr[j]
|
||||
if M_ij > cost_ikkj:
|
||||
M_i_ptr[j] = cost_ikkj
|
||||
path[i][j] = k
|
||||
|
||||
# set unreachable path to 510
|
||||
for i in range(n):
|
||||
for j in range(n):
|
||||
if M[i][j] >= 510:
|
||||
path[i][j] = 510
|
||||
M[i][j] = 510
|
||||
|
||||
return M, path
|
||||
|
||||
|
||||
def get_all_edges(path, i, j):
|
||||
cdef unsigned int k = path[i][j]
|
||||
if k == 0:
|
||||
return []
|
||||
else:
|
||||
return get_all_edges(path, i, k) + [k] + get_all_edges(path, k, j)
|
||||
|
@ -1,71 +0,0 @@
|
||||
"""Base DataModule class."""
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import pytorch_lightning as pl
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
class Config(dict):
|
||||
def __getattr__(self, name):
|
||||
return self.get(name)
|
||||
|
||||
def __setattr__(self, name, val):
|
||||
self[name] = val
|
||||
|
||||
|
||||
BATCH_SIZE = 8
|
||||
NUM_WORKERS = 8
|
||||
|
||||
|
||||
class BaseDataModule(pl.LightningDataModule):
|
||||
"""
|
||||
Base DataModule.
|
||||
Learn more at https://pytorch-lightning.readthedocs.io/en/stable/datamodules.html
|
||||
"""
|
||||
|
||||
def __init__(self, args: argparse.Namespace = None) -> None:
|
||||
super().__init__()
|
||||
self.args = Config(vars(args)) if args is not None else {}
|
||||
self.batch_size = self.args.get("batch_size", BATCH_SIZE)
|
||||
self.num_workers = self.args.get("num_workers", NUM_WORKERS)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def add_to_argparse(parser):
|
||||
parser.add_argument(
|
||||
"--batch_size", type=int, default=BATCH_SIZE, help="Number of examples to operate on per forward step."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_workers", type=int, default=0, help="Number of additional processes to load data."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dataset", type=str, default="./dataset/NELL", help="Number of additional processes to load data."
|
||||
)
|
||||
return parser
|
||||
|
||||
def prepare_data(self):
|
||||
"""
|
||||
Use this method to do things that might write to disk or that need to be done only from a single GPU in distributed settings (so don't set state `self.x = y`).
|
||||
"""
|
||||
pass
|
||||
|
||||
def setup(self, stage=None):
|
||||
"""
|
||||
Split into train, val, test, and set dims.
|
||||
Should assign `torch Dataset` objects to self.data_train, self.data_val, and optionally self.data_test.
|
||||
"""
|
||||
self.data_train = None
|
||||
self.data_val = None
|
||||
self.data_test = None
|
||||
|
||||
def train_dataloader(self):
|
||||
return DataLoader(self.data_train, shuffle=True, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)
|
||||
|
||||
def val_dataloader(self):
|
||||
return DataLoader(self.data_val, shuffle=False, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)
|
||||
|
||||
def test_dataloader(self):
|
||||
return DataLoader(self.data_test, shuffle=False, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)
|
@ -1,195 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, List, NewType, Optional, Tuple, Union
|
||||
from enum import Enum
|
||||
import torch
|
||||
|
||||
from torch.utils.data import DataLoader
|
||||
from transformers import AutoTokenizer, BertTokenizer
|
||||
# from transformers.configuration_bert import BertTokenizer, BertTokenizerFast
|
||||
from transformers.tokenization_utils_base import (BatchEncoding,
|
||||
PreTrainedTokenizerBase)
|
||||
|
||||
from .base_data_module import BaseDataModule
|
||||
from .processor import KGProcessor, get_dataset
|
||||
import transformers
|
||||
transformers.logging.set_verbosity_error()
|
||||
|
||||
class ExplicitEnum(Enum):
|
||||
"""
|
||||
Enum with more explicit error message for missing values.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def _missing_(cls, value):
|
||||
raise ValueError(
|
||||
f"{value} is not a valid {cls.__name__}, please select one of {list(cls._value2member_map_.keys())}"
|
||||
)
|
||||
|
||||
class PaddingStrategy(ExplicitEnum):
|
||||
"""
|
||||
Possible values for the ``padding`` argument in :meth:`PreTrainedTokenizerBase.__call__`. Useful for tab-completion
|
||||
in an IDE.
|
||||
"""
|
||||
|
||||
LONGEST = "longest"
|
||||
MAX_LENGTH = "max_length"
|
||||
DO_NOT_PAD = "do_not_pad"
|
||||
|
||||
import numpy as np
|
||||
|
||||
@dataclass
|
||||
class DataCollatorForSeq2Seq:
|
||||
"""
|
||||
Data collator that will dynamically pad the inputs received, as well as the labels.
|
||||
|
||||
Args:
|
||||
tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
|
||||
The tokenizer used for encoding the data.
|
||||
model (:class:`~transformers.PreTrainedModel`):
|
||||
The model that is being trained. If set and has the `prepare_decoder_input_ids_from_labels`, use it to
|
||||
prepare the `decoder_input_ids`
|
||||
|
||||
This is useful when using `label_smoothing` to avoid calculating loss twice.
|
||||
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
||||
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
||||
among:
|
||||
|
||||
* :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
|
||||
sequence is provided).
|
||||
* :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
|
||||
maximum acceptable input length for the model if that argument is not provided.
|
||||
* :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
|
||||
different lengths).
|
||||
max_length (:obj:`int`, `optional`):
|
||||
Maximum length of the returned list and optionally padding length (see above).
|
||||
pad_to_multiple_of (:obj:`int`, `optional`):
|
||||
If set will pad the sequence to a multiple of the provided value.
|
||||
|
||||
This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
|
||||
7.5 (Volta).
|
||||
label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
|
||||
The id to use when padding the labels (-100 will be automatically ignored by PyTorch loss functions).
|
||||
"""
|
||||
|
||||
tokenizer: PreTrainedTokenizerBase
|
||||
model: Optional[Any] = None
|
||||
padding: Union[bool, str, PaddingStrategy] = True
|
||||
max_length: Optional[int] = None
|
||||
pad_to_multiple_of: Optional[int] = None
|
||||
label_pad_token_id: int = -100
|
||||
return_tensors: str = "pt"
|
||||
num_labels: int = 0
|
||||
|
||||
def __call__(self, features, return_tensors=None):
|
||||
|
||||
if return_tensors is None:
|
||||
return_tensors = self.return_tensors
|
||||
labels = [feature.pop("labels") for feature in features] if "labels" in features[0].keys() else None
|
||||
label = [feature.pop("label") for feature in features]
|
||||
features_keys = {}
|
||||
name_keys = list(features[0].keys())
|
||||
for k in name_keys:
|
||||
# ignore the padding arguments
|
||||
if k in ["input_ids", "attention_mask", "token_type_ids"]: continue
|
||||
try:
|
||||
features_keys[k] = [feature.pop(k) for feature in features]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
# We have to pad the labels before calling `tokenizer.pad` as this method won't pad them and needs them of the
|
||||
# same length to return tensors.
|
||||
bsz = len(labels)
|
||||
with torch.no_grad():
|
||||
new_labels = torch.zeros(bsz, self.num_labels)
|
||||
for i,l in enumerate(labels):
|
||||
if isinstance(l, int):
|
||||
new_labels[i][l] = 1
|
||||
else:
|
||||
for j in l:
|
||||
new_labels[i][j] = 1
|
||||
labels = new_labels
|
||||
|
||||
features = self.tokenizer.pad(
|
||||
features,
|
||||
padding=self.padding,
|
||||
max_length=self.max_length,
|
||||
pad_to_multiple_of=self.pad_to_multiple_of,
|
||||
return_tensors=return_tensors,
|
||||
)
|
||||
features['labels'] = labels
|
||||
features['label'] = torch.tensor(label)
|
||||
features.update(features_keys)
|
||||
|
||||
return features
|
||||
|
||||
|
||||
|
||||
class KGC(BaseDataModule):
|
||||
def __init__(self, args, model) -> None:
|
||||
super().__init__(args)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.args.model_name_or_path, use_fast=False)
|
||||
self.processor = KGProcessor(self.tokenizer, args)
|
||||
self.label_list = self.processor.get_labels(args.data_dir)
|
||||
|
||||
entity_list = self.processor.get_entities(args.data_dir)
|
||||
|
||||
num_added_tokens = self.tokenizer.add_special_tokens({'additional_special_tokens': entity_list})
|
||||
self.sampler = DataCollatorForSeq2Seq(self.tokenizer,
|
||||
model=model,
|
||||
label_pad_token_id=self.tokenizer.pad_token_id,
|
||||
pad_to_multiple_of=8 if self.args.precision == 16 else None,
|
||||
padding="longest",
|
||||
max_length=self.args.max_seq_length,
|
||||
num_labels = len(entity_list),
|
||||
)
|
||||
relations_tokens = self.processor.get_relations(args.data_dir)
|
||||
self.num_relations = len(relations_tokens)
|
||||
num_added_tokens = self.tokenizer.add_special_tokens({'additional_special_tokens': relations_tokens})
|
||||
|
||||
vocab = self.tokenizer.get_added_vocab()
|
||||
self.relation_id_st = vocab[relations_tokens[0]]
|
||||
self.relation_id_ed = vocab[relations_tokens[-1]] + 1
|
||||
self.entity_id_st = vocab[entity_list[0]]
|
||||
self.entity_id_ed = vocab[entity_list[-1]] + 1
|
||||
|
||||
|
||||
def setup(self, stage=None):
|
||||
self.data_train = get_dataset(self.args, self.processor, self.label_list, self.tokenizer, "train")
|
||||
self.data_val = get_dataset(self.args, self.processor, self.label_list, self.tokenizer, "dev")
|
||||
self.data_test = get_dataset(self.args, self.processor, self.label_list, self.tokenizer, "test")
|
||||
|
||||
def prepare_data(self):
|
||||
pass
|
||||
|
||||
def get_config(self):
|
||||
d = {}
|
||||
for k, v in self.__dict__.items():
|
||||
if "st" in k or "ed" in k:
|
||||
d.update({k:v})
|
||||
|
||||
return d
|
||||
|
||||
|
||||
@staticmethod
|
||||
def add_to_argparse(parser):
|
||||
BaseDataModule.add_to_argparse(parser)
|
||||
parser.add_argument("--model_name_or_path", type=str, default="roberta-base", help="the name or the path to the pretrained model")
|
||||
parser.add_argument("--data_dir", type=str, default="roberta-base", help="the name or the path to the pretrained model")
|
||||
parser.add_argument("--max_seq_length", type=int, default=256, help="Number of examples to operate on per forward step.")
|
||||
parser.add_argument("--warm_up_radio", type=float, default=0.1, help="Number of examples to operate on per forward step.")
|
||||
parser.add_argument("--eval_batch_size", type=int, default=8)
|
||||
parser.add_argument("--overwrite_cache", action="store_true", default=False)
|
||||
return parser
|
||||
|
||||
def get_tokenizer(self):
|
||||
return self.tokenizer
|
||||
|
||||
def train_dataloader(self):
|
||||
return DataLoader(self.data_train, num_workers=self.num_workers, pin_memory=True, collate_fn=self.sampler, batch_size=self.args.batch_size, shuffle=not self.args.faiss_init)
|
||||
|
||||
def val_dataloader(self):
|
||||
return DataLoader(self.data_val, num_workers=self.num_workers, pin_memory=True, collate_fn=self.sampler, batch_size=self.args.eval_batch_size)
|
||||
|
||||
def test_dataloader(self):
|
||||
return DataLoader(self.data_test, num_workers=self.num_workers, pin_memory=True, collate_fn=self.sampler, batch_size=self.args.eval_batch_size)
|
||||
|
15
data/icews14/about.txt
Normal file
15
data/icews14/about.txt
Normal file
@ -0,0 +1,15 @@
|
||||
# triples: 89320
|
||||
# entities: 7128
|
||||
# relations: 12409
|
||||
# timesteps: 208
|
||||
# test triples: 8255
|
||||
# valid triples: 8239
|
||||
# train triples: 72826
|
||||
Measure method: N/A
|
||||
Target Size : 0
|
||||
Grow Factor: 0
|
||||
Shrink Factor: 0
|
||||
Epsilon Factor: 0
|
||||
Search method: N/A
|
||||
filter_dupes: inter
|
||||
nonames: False
|
7128
data/icews14/entities.dict
Normal file
7128
data/icews14/entities.dict
Normal file
File diff suppressed because it is too large
Load Diff
12409
data/icews14/relations.dict
Normal file
12409
data/icews14/relations.dict
Normal file
File diff suppressed because it is too large
Load Diff
8255
data/icews14/test.txt
Normal file
8255
data/icews14/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
209
data/icews14/time_map.dict
Normal file
209
data/icews14/time_map.dict
Normal file
@ -0,0 +1,209 @@
|
||||
0 0 2
|
||||
1 3 5
|
||||
2 6 7
|
||||
3 8 9
|
||||
4 10 12
|
||||
5 13 14
|
||||
6 15 16
|
||||
7 17 19
|
||||
8 20 21
|
||||
9 22 23
|
||||
10 24 26
|
||||
11 27 28
|
||||
12 29 30
|
||||
13 31 33
|
||||
14 34 35
|
||||
15 36 37
|
||||
16 38 40
|
||||
17 41 42
|
||||
18 43 44
|
||||
19 45 46
|
||||
20 47 48
|
||||
21 49 49
|
||||
22 50 50
|
||||
23 51 51
|
||||
24 52 53
|
||||
25 54 54
|
||||
26 55 55
|
||||
27 56 57
|
||||
28 58 59
|
||||
29 60 61
|
||||
30 62 62
|
||||
31 63 63
|
||||
32 64 65
|
||||
33 66 68
|
||||
34 69 70
|
||||
35 71 71
|
||||
36 72 72
|
||||
37 73 74
|
||||
38 75 76
|
||||
39 77 78
|
||||
40 79 80
|
||||
41 81 82
|
||||
42 83 84
|
||||
43 85 85
|
||||
44 86 87
|
||||
45 88 89
|
||||
46 90 91
|
||||
47 92 93
|
||||
48 94 96
|
||||
49 97 97
|
||||
50 98 99
|
||||
51 100 101
|
||||
52 102 103
|
||||
53 104 105
|
||||
54 106 107
|
||||
55 108 110
|
||||
56 111 112
|
||||
57 113 114
|
||||
58 115 116
|
||||
59 117 118
|
||||
60 119 119
|
||||
61 120 121
|
||||
62 122 124
|
||||
63 125 125
|
||||
64 126 127
|
||||
65 128 129
|
||||
66 130 131
|
||||
67 132 133
|
||||
68 134 135
|
||||
69 136 138
|
||||
70 139 139
|
||||
71 140 140
|
||||
72 141 141
|
||||
73 142 143
|
||||
74 144 145
|
||||
75 146 147
|
||||
76 148 148
|
||||
77 149 150
|
||||
78 151 152
|
||||
79 153 154
|
||||
80 155 155
|
||||
81 156 157
|
||||
82 158 159
|
||||
83 160 161
|
||||
84 162 163
|
||||
85 164 166
|
||||
86 167 167
|
||||
87 168 168
|
||||
88 169 169
|
||||
89 170 170
|
||||
90 171 173
|
||||
91 174 175
|
||||
92 176 177
|
||||
93 178 180
|
||||
94 181 182
|
||||
95 183 183
|
||||
96 184 185
|
||||
97 186 187
|
||||
98 188 188
|
||||
99 189 190
|
||||
100 191 192
|
||||
101 193 194
|
||||
102 195 195
|
||||
103 196 197
|
||||
104 198 199
|
||||
105 200 201
|
||||
106 202 203
|
||||
107 204 205
|
||||
108 206 208
|
||||
109 209 210
|
||||
110 211 212
|
||||
111 213 215
|
||||
112 216 217
|
||||
113 218 219
|
||||
114 220 221
|
||||
115 222 222
|
||||
116 223 224
|
||||
117 225 226
|
||||
118 227 229
|
||||
119 230 231
|
||||
120 232 233
|
||||
121 234 236
|
||||
122 237 238
|
||||
123 239 239
|
||||
124 240 241
|
||||
125 242 243
|
||||
126 244 245
|
||||
127 246 246
|
||||
128 247 248
|
||||
129 249 250
|
||||
130 251 251
|
||||
131 252 252
|
||||
132 253 253
|
||||
133 254 254
|
||||
134 255 256
|
||||
135 257 257
|
||||
136 258 259
|
||||
137 260 261
|
||||
138 262 263
|
||||
139 264 264
|
||||
140 265 265
|
||||
141 266 266
|
||||
142 267 267
|
||||
143 268 269
|
||||
144 270 271
|
||||
145 272 272
|
||||
146 273 273
|
||||
147 274 274
|
||||
148 275 276
|
||||
149 277 278
|
||||
150 279 279
|
||||
151 280 281
|
||||
152 282 283
|
||||
153 284 285
|
||||
154 286 286
|
||||
155 287 287
|
||||
156 288 288
|
||||
157 289 289
|
||||
158 290 291
|
||||
159 292 292
|
||||
160 293 293
|
||||
161 294 294
|
||||
162 295 295
|
||||
163 296 297
|
||||
164 298 299
|
||||
165 300 300
|
||||
166 301 301
|
||||
167 302 303
|
||||
168 304 305
|
||||
169 306 307
|
||||
170 308 309
|
||||
171 310 310
|
||||
172 311 312
|
||||
173 313 313
|
||||
174 314 314
|
||||
175 315 315
|
||||
176 316 316
|
||||
177 317 317
|
||||
178 318 319
|
||||
179 320 320
|
||||
180 321 321
|
||||
181 322 322
|
||||
182 323 323
|
||||
183 324 324
|
||||
184 325 326
|
||||
185 327 327
|
||||
186 328 328
|
||||
187 329 329
|
||||
188 330 330
|
||||
189 331 332
|
||||
190 333 334
|
||||
191 335 335
|
||||
192 336 336
|
||||
193 337 338
|
||||
194 339 340
|
||||
195 341 342
|
||||
196 343 343
|
||||
197 344 344
|
||||
198 345 346
|
||||
199 347 348
|
||||
200 349 349
|
||||
201 350 350
|
||||
202 351 352
|
||||
203 353 355
|
||||
204 356 357
|
||||
205 358 359
|
||||
206 360 362
|
||||
207 363 365
|
||||
208 366 366
|
72826
data/icews14/train.txt
Normal file
72826
data/icews14/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
8239
data/icews14/valid.txt
Normal file
8239
data/icews14/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
15
data/icews14_both/about.txt
Normal file
15
data/icews14_both/about.txt
Normal file
@ -0,0 +1,15 @@
|
||||
# triples: 86517
|
||||
# entities: 7128
|
||||
# relations: 12409
|
||||
# timesteps: 208
|
||||
# test triples: 8218
|
||||
# valid triples: 8193
|
||||
# train triples: 70106
|
||||
Measure method: N/A
|
||||
Target Size : 0
|
||||
Grow Factor: 0
|
||||
Shrink Factor: 0
|
||||
Epsilon Factor: 0
|
||||
Search method: N/A
|
||||
filter_dupes: both
|
||||
nonames: False
|
7128
data/icews14_both/entities.dict
Normal file
7128
data/icews14_both/entities.dict
Normal file
File diff suppressed because it is too large
Load Diff
12409
data/icews14_both/relations.dict
Normal file
12409
data/icews14_both/relations.dict
Normal file
File diff suppressed because it is too large
Load Diff
8218
data/icews14_both/test.txt
Normal file
8218
data/icews14_both/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
209
data/icews14_both/time_map.dict
Normal file
209
data/icews14_both/time_map.dict
Normal file
@ -0,0 +1,209 @@
|
||||
0 0 2
|
||||
1 3 5
|
||||
2 6 7
|
||||
3 8 9
|
||||
4 10 12
|
||||
5 13 14
|
||||
6 15 16
|
||||
7 17 19
|
||||
8 20 21
|
||||
9 22 23
|
||||
10 24 26
|
||||
11 27 28
|
||||
12 29 30
|
||||
13 31 33
|
||||
14 34 35
|
||||
15 36 37
|
||||
16 38 40
|
||||
17 41 42
|
||||
18 43 44
|
||||
19 45 46
|
||||
20 47 48
|
||||
21 49 49
|
||||
22 50 50
|
||||
23 51 51
|
||||
24 52 53
|
||||
25 54 54
|
||||
26 55 55
|
||||
27 56 57
|
||||
28 58 59
|
||||
29 60 61
|
||||
30 62 62
|
||||
31 63 63
|
||||
32 64 65
|
||||
33 66 68
|
||||
34 69 70
|
||||
35 71 71
|
||||
36 72 72
|
||||
37 73 74
|
||||
38 75 76
|
||||
39 77 78
|
||||
40 79 80
|
||||
41 81 82
|
||||
42 83 84
|
||||
43 85 85
|
||||
44 86 87
|
||||
45 88 89
|
||||
46 90 91
|
||||
47 92 93
|
||||
48 94 96
|
||||
49 97 97
|
||||
50 98 99
|
||||
51 100 101
|
||||
52 102 103
|
||||
53 104 105
|
||||
54 106 107
|
||||
55 108 110
|
||||
56 111 112
|
||||
57 113 114
|
||||
58 115 116
|
||||
59 117 118
|
||||
60 119 119
|
||||
61 120 121
|
||||
62 122 124
|
||||
63 125 125
|
||||
64 126 127
|
||||
65 128 129
|
||||
66 130 131
|
||||
67 132 133
|
||||
68 134 135
|
||||
69 136 138
|
||||
70 139 139
|
||||
71 140 140
|
||||
72 141 141
|
||||
73 142 143
|
||||
74 144 145
|
||||
75 146 147
|
||||
76 148 148
|
||||
77 149 150
|
||||
78 151 152
|
||||
79 153 154
|
||||
80 155 155
|
||||
81 156 157
|
||||
82 158 159
|
||||
83 160 161
|
||||
84 162 163
|
||||
85 164 166
|
||||
86 167 167
|
||||
87 168 168
|
||||
88 169 169
|
||||
89 170 170
|
||||
90 171 173
|
||||
91 174 175
|
||||
92 176 177
|
||||
93 178 180
|
||||
94 181 182
|
||||
95 183 183
|
||||
96 184 185
|
||||
97 186 187
|
||||
98 188 188
|
||||
99 189 190
|
||||
100 191 192
|
||||
101 193 194
|
||||
102 195 195
|
||||
103 196 197
|
||||
104 198 199
|
||||
105 200 201
|
||||
106 202 203
|
||||
107 204 205
|
||||
108 206 208
|
||||
109 209 210
|
||||
110 211 212
|
||||
111 213 215
|
||||
112 216 217
|
||||
113 218 219
|
||||
114 220 221
|
||||
115 222 222
|
||||
116 223 224
|
||||
117 225 226
|
||||
118 227 229
|
||||
119 230 231
|
||||
120 232 233
|
||||
121 234 236
|
||||
122 237 238
|
||||
123 239 239
|
||||
124 240 241
|
||||
125 242 243
|
||||
126 244 245
|
||||
127 246 246
|
||||
128 247 248
|
||||
129 249 250
|
||||
130 251 251
|
||||
131 252 252
|
||||
132 253 253
|
||||
133 254 254
|
||||
134 255 256
|
||||
135 257 257
|
||||
136 258 259
|
||||
137 260 261
|
||||
138 262 263
|
||||
139 264 264
|
||||
140 265 265
|
||||
141 266 266
|
||||
142 267 267
|
||||
143 268 269
|
||||
144 270 271
|
||||
145 272 272
|
||||
146 273 273
|
||||
147 274 274
|
||||
148 275 276
|
||||
149 277 278
|
||||
150 279 279
|
||||
151 280 281
|
||||
152 282 283
|
||||
153 284 285
|
||||
154 286 286
|
||||
155 287 287
|
||||
156 288 288
|
||||
157 289 289
|
||||
158 290 291
|
||||
159 292 292
|
||||
160 293 293
|
||||
161 294 294
|
||||
162 295 295
|
||||
163 296 297
|
||||
164 298 299
|
||||
165 300 300
|
||||
166 301 301
|
||||
167 302 303
|
||||
168 304 305
|
||||
169 306 307
|
||||
170 308 309
|
||||
171 310 310
|
||||
172 311 312
|
||||
173 313 313
|
||||
174 314 314
|
||||
175 315 315
|
||||
176 316 316
|
||||
177 317 317
|
||||
178 318 319
|
||||
179 320 320
|
||||
180 321 321
|
||||
181 322 322
|
||||
182 323 323
|
||||
183 324 324
|
||||
184 325 326
|
||||
185 327 327
|
||||
186 328 328
|
||||
187 329 329
|
||||
188 330 330
|
||||
189 331 332
|
||||
190 333 334
|
||||
191 335 335
|
||||
192 336 336
|
||||
193 337 338
|
||||
194 339 340
|
||||
195 341 342
|
||||
196 343 343
|
||||
197 344 344
|
||||
198 345 346
|
||||
199 347 348
|
||||
200 349 349
|
||||
201 350 350
|
||||
202 351 352
|
||||
203 353 355
|
||||
204 356 357
|
||||
205 358 359
|
||||
206 360 362
|
||||
207 363 365
|
||||
208 366 366
|
70106
data/icews14_both/train.txt
Normal file
70106
data/icews14_both/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
8193
data/icews14_both/valid.txt
Normal file
8193
data/icews14_both/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
1074
data/kinship/test.txt
Normal file
1074
data/kinship/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
8544
data/kinship/train.txt
Normal file
8544
data/kinship/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
1068
data/kinship/valid.txt
Normal file
1068
data/kinship/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,954 +0,0 @@
|
||||
from hashlib import new
|
||||
from re import DEBUG
|
||||
|
||||
import contextlib
|
||||
import sys
|
||||
|
||||
from collections import Counter
|
||||
from multiprocessing import Pool
|
||||
from torch._C import HOIST_CONV_PACKED_PARAMS
|
||||
from torch.utils.data import Dataset, Sampler, IterableDataset
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
from multiprocessing import Pool
|
||||
import os
|
||||
import random
|
||||
import json
|
||||
import torch
|
||||
import copy
|
||||
import numpy as np
|
||||
import pickle
|
||||
from tqdm import tqdm
|
||||
from dataclasses import dataclass, asdict, replace
|
||||
import inspect
|
||||
|
||||
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
||||
|
||||
from models.utils import get_entity_spans_pre_processing
|
||||
import pyximport
|
||||
|
||||
pyximport.install(setup_args={'include_dirs': np.get_include()})
|
||||
import data.algos as algos
|
||||
|
||||
def lmap(a, b):
|
||||
return list(map(a,b)) # a是个函数,b是个值列表,返回函数值列表
|
||||
|
||||
def cache_results(_cache_fp, _refresh=False, _verbose=1):
|
||||
r"""
|
||||
cache_results是fastNLP中用于cache数据的装饰器。通过下面的例子看一下如何使用::
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
from fastNLP import cache_results
|
||||
|
||||
@cache_results('cache.pkl')
|
||||
def process_data():
|
||||
# 一些比较耗时的工作,比如读取数据,预处理数据等,这里用time.sleep()代替耗时
|
||||
time.sleep(1)
|
||||
return np.random.randint(10, size=(5,))
|
||||
|
||||
start_time = time.time()
|
||||
print("res =",process_data())
|
||||
print(time.time() - start_time)
|
||||
|
||||
start_time = time.time()
|
||||
print("res =",process_data())
|
||||
print(time.time() - start_time)
|
||||
|
||||
# 输出内容如下,可以看到两次结果相同,且第二次几乎没有花费时间
|
||||
# Save cache to cache.pkl.
|
||||
# res = [5 4 9 1 8]
|
||||
# 1.0042750835418701
|
||||
# Read cache from cache.pkl.
|
||||
# res = [5 4 9 1 8]
|
||||
# 0.0040721893310546875
|
||||
|
||||
可以看到第二次运行的时候,只用了0.0001s左右,是由于第二次运行将直接从cache.pkl这个文件读取数据,而不会经过再次预处理::
|
||||
|
||||
# 还是以上面的例子为例,如果需要重新生成另一个cache,比如另一个数据集的内容,通过如下的方式调用即可
|
||||
process_data(_cache_fp='cache2.pkl') # 完全不影响之前的‘cache.pkl'
|
||||
|
||||
上面的_cache_fp是cache_results会识别的参数,它将从'cache2.pkl'这里缓存/读取数据,即这里的'cache2.pkl'覆盖默认的
|
||||
'cache.pkl'。如果在你的函数前面加上了@cache_results()则你的函数会增加三个参数[_cache_fp, _refresh, _verbose]。
|
||||
上面的例子即为使用_cache_fp的情况,这三个参数不会传入到你的函数中,当然你写的函数参数名也不可能包含这三个名称::
|
||||
|
||||
process_data(_cache_fp='cache2.pkl', _refresh=True) # 这里强制重新生成一份对预处理的cache。
|
||||
# _verbose是用于控制输出信息的,如果为0,则不输出任何内容;如果为1,则会提醒当前步骤是读取的cache还是生成了新的cache
|
||||
|
||||
:param str _cache_fp: 将返回结果缓存到什么位置;或从什么位置读取缓存。如果为None,cache_results没有任何效用,除非在
|
||||
函数调用的时候传入_cache_fp这个参数。
|
||||
:param bool _refresh: 是否重新生成cache。
|
||||
:param int _verbose: 是否打印cache的信息。
|
||||
:return:
|
||||
"""
|
||||
|
||||
def wrapper_(func):
|
||||
signature = inspect.signature(func)
|
||||
for key, _ in signature.parameters.items():
|
||||
if key in ('_cache_fp', '_refresh', '_verbose'):
|
||||
raise RuntimeError("The function decorated by cache_results cannot have keyword `{}`.".format(key))
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
my_args = args[0]
|
||||
mode = args[-1]
|
||||
if '_cache_fp' in kwargs:
|
||||
cache_filepath = kwargs.pop('_cache_fp')
|
||||
assert isinstance(cache_filepath, str), "_cache_fp can only be str."
|
||||
else:
|
||||
cache_filepath = _cache_fp
|
||||
if '_refresh' in kwargs:
|
||||
refresh = kwargs.pop('_refresh')
|
||||
assert isinstance(refresh, bool), "_refresh can only be bool."
|
||||
else:
|
||||
refresh = _refresh
|
||||
if '_verbose' in kwargs:
|
||||
verbose = kwargs.pop('_verbose')
|
||||
assert isinstance(verbose, int), "_verbose can only be integer."
|
||||
else:
|
||||
verbose = _verbose
|
||||
refresh_flag = True
|
||||
|
||||
model_name = my_args.model_name_or_path.split("/")[-1]
|
||||
is_pretrain = my_args.pretrain
|
||||
cache_filepath = os.path.join(my_args.data_dir, f"cached_{mode}_features{model_name}_pretrain{is_pretrain}_faiss{my_args.faiss_init}_seqlength{my_args.max_seq_length}_{my_args.litmodel_class}.pkl")
|
||||
refresh = my_args.overwrite_cache
|
||||
|
||||
if cache_filepath is not None and refresh is False:
|
||||
# load data
|
||||
if os.path.exists(cache_filepath):
|
||||
with open(cache_filepath, 'rb') as f:
|
||||
results = pickle.load(f)
|
||||
if verbose == 1:
|
||||
logger.info("Read cache from {}.".format(cache_filepath))
|
||||
refresh_flag = False
|
||||
|
||||
if refresh_flag:
|
||||
results = func(*args, **kwargs)
|
||||
if cache_filepath is not None:
|
||||
if results is None:
|
||||
raise RuntimeError("The return value is None. Delete the decorator.")
|
||||
with open(cache_filepath, 'wb') as f:
|
||||
pickle.dump(results, f)
|
||||
logger.info("Save cache to {}.".format(cache_filepath))
|
||||
|
||||
return results
|
||||
|
||||
return wrapper
|
||||
|
||||
return wrapper_
|
||||
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
|
||||
TensorDataset)
|
||||
from torch.utils.data.distributed import DistributedSampler
|
||||
from tqdm import tqdm, trange
|
||||
|
||||
# from torch.nn import CrossEntropyLoss, MSELoss
|
||||
# from scipy.stats import pearsonr, spearmanr
|
||||
# from sklearn.metrics import matthews_corrcoef, f1_scoreclass
|
||||
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class InputExample(object):
|
||||
"""A single training/test example for simple sequence classification."""
|
||||
|
||||
def __init__(self, guid, text_a, text_b=None, text_c=None, text_d=None, label=None, real_label=None, en=None, en_id=None, rel=None, text_d_id=None, graph_inf=None):
|
||||
"""Constructs a InputExample.
|
||||
|
||||
Args:
|
||||
guid: Unique id for the example.
|
||||
text_a: string. The untokenized text of the first sequence. For single
|
||||
sequence tasks, only this sequence must be specified.
|
||||
text_b: (Optional) string. The untokenized text of the second sequence.
|
||||
Only must be specified for sequence pair tasks.
|
||||
text_c: (Optional) string. The untokenized text of the third sequence.
|
||||
Only must be specified for sequence triple tasks.
|
||||
label: (Optional) string. list of entities
|
||||
"""
|
||||
self.guid = guid
|
||||
self.text_a = text_a
|
||||
self.text_b = text_b
|
||||
self.text_c = text_c
|
||||
self.text_d = text_d
|
||||
self.label = label
|
||||
self.real_label = real_label
|
||||
self.en = en
|
||||
self.rel = rel # rel id
|
||||
self.text_d_id = text_d_id
|
||||
self.graph_inf = graph_inf
|
||||
self.en_id = en_id
|
||||
|
||||
|
||||
@dataclass
|
||||
class InputFeatures:
|
||||
"""A single set of features of data."""
|
||||
|
||||
input_ids: torch.Tensor
|
||||
attention_mask: torch.Tensor
|
||||
labels: torch.Tensor = None
|
||||
label: torch.Tensor = None
|
||||
en: torch.Tensor = 0
|
||||
rel: torch.Tensor = 0
|
||||
pos: torch.Tensor = 0
|
||||
graph: torch.Tensor = 0
|
||||
distance_attention: torch.Tensor = 0
|
||||
# attention_bias: torch.Tensor = 0
|
||||
|
||||
|
||||
class DataProcessor(object):
|
||||
"""Base class for data converters for sequence classification data sets."""
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""Gets a collection of `InputExample`s for the train set."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""Gets a collection of `InputExample`s for the dev set."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_labels(self, data_dir):
|
||||
"""Gets the list of labels for this data set."""
|
||||
raise NotImplementedError()
|
||||
|
||||
@classmethod
|
||||
def _read_tsv(cls, input_file, quotechar=None):
|
||||
"""Reads a tab separated value file."""
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
|
||||
lines = []
|
||||
for line in reader:
|
||||
if sys.version_info[0] == 2:
|
||||
line = list(unicode(cell, 'utf-8') for cell in line)
|
||||
lines.append(line)
|
||||
return lines
|
||||
|
||||
import copy
|
||||
|
||||
|
||||
def solve_get_knowledge_store(line, set_type="train", pretrain=1):
|
||||
"""
|
||||
use the LM to get the entity embedding.
|
||||
Transductive: triples + text description
|
||||
Inductive: text description
|
||||
|
||||
"""
|
||||
examples = []
|
||||
|
||||
head_ent_text = ent2text[line[0]]
|
||||
tail_ent_text = ent2text[line[2]]
|
||||
relation_text = rel2text[line[1]]
|
||||
|
||||
i=0
|
||||
|
||||
a = tail_filter_entities["\t".join([line[0],line[1]])]
|
||||
b = head_filter_entities["\t".join([line[2],line[1]])]
|
||||
|
||||
guid = "%s-%s" % (set_type, i)
|
||||
text_a = head_ent_text
|
||||
text_b = relation_text
|
||||
text_c = tail_ent_text
|
||||
|
||||
# use the description of c to predict A
|
||||
examples.append(
|
||||
InputExample(guid=guid, text_a="[PAD]", text_b=text_b + "[PAD]", text_c = "[PAD]" + " " + text_c, label=lmap(lambda x: ent2id[x], b), real_label=ent2id[line[0]], en=[ent2id[line[0]], rel2id[line[1]], ent2id[line[2]]], rel=0)
|
||||
)
|
||||
examples.append(
|
||||
InputExample(guid=guid, text_a="[PAD]", text_b=text_b + "[PAD]", text_c = "[PAD]" + " " + text_a, label=lmap(lambda x: ent2id[x], b), real_label=ent2id[line[2]], en=[ent2id[line[0]], rel2id[line[1]], ent2id[line[2]]], rel=0)
|
||||
)
|
||||
return examples
|
||||
|
||||
|
||||
def solve(line, set_type="train", pretrain=1, max_triplet=32):
|
||||
examples = []
|
||||
|
||||
head_ent_text = ent2text[line[0]]
|
||||
tail_ent_text = ent2text[line[2]]
|
||||
relation_text = rel2text[line[1]]
|
||||
|
||||
i=0
|
||||
|
||||
a = tail_filter_entities["\t".join([line[0],line[1]])]
|
||||
b = head_filter_entities["\t".join([line[2],line[1]])]
|
||||
|
||||
guid = "%s-%s" % (set_type, i)
|
||||
text_a = head_ent_text
|
||||
text_b = relation_text
|
||||
text_c = tail_ent_text
|
||||
|
||||
|
||||
if pretrain:
|
||||
text_a_tokens = text_a.split()
|
||||
for i in range(10):
|
||||
st = random.randint(0, len(text_a_tokens))
|
||||
examples.append(
|
||||
InputExample(guid=guid, text_a="[MASK]", text_b=" ".join(text_a_tokens[st:min(st+64, len(text_a_tokens))]), text_c = "", label=ent2id[line[0]], real_label=ent2id[line[0]], en=0, rel=0)
|
||||
)
|
||||
examples.append(
|
||||
InputExample(guid=guid, text_a="[MASK]", text_b=text_a, text_c = "", label=ent2id[line[0]], real_label=ent2id[line[0]], en=0, rel=0)
|
||||
)
|
||||
# examples.append(
|
||||
# InputExample(guid=guid, text_a="[MASK]", text_b=text_c, text_c = "", label=ent2id[line[2]], real_label=ent2id[line[2]], en=0, rel=0)
|
||||
# )
|
||||
else:
|
||||
# 主要是对text_c进行包装,不再是原来的文本,而是对应子图的graph(变量graph_seq)。如果mask的是尾实体,那么就让text_c在后面加入graph_seq
|
||||
# masked_head_seq = []
|
||||
# masked_tail_seq = []
|
||||
# masked_tail_graph_list = masked_tail_neighbor["\t".join([line[0],line[1]])]
|
||||
# masked_head_graph_list = masked_head_neighbor["\t".join([line[2],line[1]])]
|
||||
# for item in masked_head_graph_list:
|
||||
# masked_head_seq.append(ent2id[item[0]])
|
||||
# masked_head_seq.append(rel2id[item[1]])
|
||||
# masked_head_seq.append(ent2id[item[2]])
|
||||
|
||||
# for item in masked_tail_graph_list:
|
||||
# masked_tail_seq.append(ent2id[item[0]])
|
||||
# masked_tail_seq.append(rel2id[item[1]])
|
||||
# masked_tail_seq.append(ent2id[item[2]])
|
||||
|
||||
masked_head_seq = set()
|
||||
masked_head_seq_id = set()
|
||||
masked_tail_seq = set()
|
||||
masked_tail_seq_id = set()
|
||||
|
||||
masked_tail_graph_list = masked_tail_neighbor["\t".join([line[0],line[1]])] if len(masked_tail_neighbor["\t".join([line[0],line[1]])]) < max_triplet else \
|
||||
random.sample(masked_tail_neighbor["\t".join([line[0],line[1]])], max_triplet)
|
||||
masked_head_graph_list = masked_head_neighbor["\t".join([line[2],line[1]])] if len(masked_head_neighbor["\t".join([line[2],line[1]])]) < max_triplet else \
|
||||
random.sample(masked_head_neighbor["\t".join([line[2],line[1]])], max_triplet)
|
||||
# masked_tail_graph_list = masked_tail_neighbor["\t".join([line[0],line[1]])][:16]
|
||||
# masked_head_graph_list = masked_head_neighbor["\t".join([line[2],line[1]])][:16]
|
||||
for item in masked_head_graph_list:
|
||||
masked_head_seq.add(item[0])
|
||||
masked_head_seq.add(item[1])
|
||||
masked_head_seq.add(item[2])
|
||||
masked_head_seq_id.add(ent2id[item[0]])
|
||||
masked_head_seq_id.add(rel2id[item[1]])
|
||||
masked_head_seq_id.add(ent2id[item[2]])
|
||||
|
||||
for item in masked_tail_graph_list:
|
||||
masked_tail_seq.add(item[0])
|
||||
masked_tail_seq.add(item[1])
|
||||
masked_tail_seq.add(item[2])
|
||||
masked_tail_seq_id.add(ent2id[item[0]])
|
||||
masked_tail_seq_id.add(rel2id[item[1]])
|
||||
masked_tail_seq_id.add(ent2id[item[2]])
|
||||
# print(masked_tail_seq)
|
||||
masked_head_seq = masked_head_seq.difference({line[0]})
|
||||
masked_head_seq = masked_head_seq.difference({line[2]})
|
||||
masked_head_seq = masked_head_seq.difference({line[1]})
|
||||
masked_head_seq_id = masked_head_seq_id.difference({ent2id[line[0]]})
|
||||
masked_head_seq_id = masked_head_seq_id.difference({rel2id[line[1]]})
|
||||
masked_head_seq_id = masked_head_seq_id.difference({ent2id[line[2]]})
|
||||
|
||||
masked_tail_seq = masked_tail_seq.difference({line[0]})
|
||||
masked_tail_seq = masked_tail_seq.difference({line[2]})
|
||||
masked_tail_seq = masked_tail_seq.difference({line[1]})
|
||||
masked_tail_seq_id = masked_tail_seq_id.difference({ent2id[line[0]]})
|
||||
masked_tail_seq_id = masked_tail_seq_id.difference({rel2id[line[1]]})
|
||||
masked_tail_seq_id = masked_tail_seq_id.difference({ent2id[line[2]]})
|
||||
# examples.append(
|
||||
# InputExample(guid=guid, text_a="[MASK]", text_b=' '.join(text_b.split(' ')[:16]) + " [PAD]", text_c = "[PAD]" + " " + ' '.join(text_c.split(' ')[:16]), text_d = masked_head_seq, label=lmap(lambda x: ent2id[x], b), real_label=ent2id[line[0]], en=[rel2id[line[1]], ent2id[line[2]]], rel=rel2id[line[1]]))
|
||||
# examples.append(
|
||||
# InputExample(guid=guid, text_a="[PAD] ", text_b=' '.join(text_b.split(' ')[:16]) + " [PAD]", text_c = "[MASK]" +" " + ' '.join(text_a.split(' ')[:16]), text_d = masked_tail_seq, label=lmap(lambda x: ent2id[x], a), real_label=ent2id[line[2]], en=[ent2id[line[0]], rel2id[line[1]]], rel=rel2id[line[1]]))
|
||||
examples.append(
|
||||
InputExample(guid=guid, text_a="[MASK]", text_b="[PAD]", text_c = "[PAD]", text_d = list(masked_head_seq), label=lmap(lambda x: ent2id[x], b), real_label=ent2id[line[0]], en=[line[1], line[2]], en_id = [rel2id[line[1]], ent2id[line[2]]], rel=rel2id[line[1]], text_d_id = list(masked_head_seq_id), graph_inf = masked_head_graph_list))
|
||||
examples.append(
|
||||
InputExample(guid=guid, text_a="[PAD]", text_b="[PAD]", text_c = "[MASK]", text_d = list(masked_tail_seq), label=lmap(lambda x: ent2id[x], a), real_label=ent2id[line[2]], en=[line[0], line[1]], en_id = [ent2id[line[0]], rel2id[line[1]]], rel=rel2id[line[1]], text_d_id = list(masked_tail_seq_id), graph_inf = masked_tail_graph_list))
|
||||
return examples
|
||||
|
||||
def filter_init(head, tail, t1,t2, ent2id_, ent2token_, rel2id_, masked_head_neighbor_, masked_tail_neighbor_, rel2token_):
|
||||
global head_filter_entities
|
||||
global tail_filter_entities
|
||||
global ent2text
|
||||
global rel2text
|
||||
global ent2id
|
||||
global ent2token
|
||||
global rel2id
|
||||
global masked_head_neighbor
|
||||
global masked_tail_neighbor
|
||||
global rel2token
|
||||
|
||||
head_filter_entities = head
|
||||
tail_filter_entities = tail
|
||||
ent2text =t1
|
||||
rel2text =t2
|
||||
ent2id = ent2id_
|
||||
ent2token = ent2token_
|
||||
rel2id = rel2id_
|
||||
masked_head_neighbor = masked_head_neighbor_
|
||||
masked_tail_neighbor = masked_tail_neighbor_
|
||||
rel2token = rel2token_
|
||||
|
||||
def delete_init(ent2text_):
|
||||
global ent2text
|
||||
ent2text = ent2text_
|
||||
|
||||
|
||||
class KGProcessor(DataProcessor):
|
||||
"""Processor for knowledge graph data set."""
|
||||
def __init__(self, tokenizer, args):
|
||||
self.labels = set()
|
||||
self.tokenizer = tokenizer
|
||||
self.args = args
|
||||
self.entity_path = os.path.join(args.data_dir, "entity2textlong.txt") if os.path.exists(os.path.join(args.data_dir, 'entity2textlong.txt')) \
|
||||
else os.path.join(args.data_dir, "entity2text.txt")
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
return self._create_examples(
|
||||
self._read_tsv(os.path.join(data_dir, "train.tsv")), "train", data_dir, self.args)
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
return self._create_examples(
|
||||
self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev", data_dir, self.args)
|
||||
|
||||
def get_test_examples(self, data_dir, chunk=""):
|
||||
"""See base class."""
|
||||
return self._create_examples(
|
||||
self._read_tsv(os.path.join(data_dir, f"test{chunk}.tsv")), "test", data_dir, self.args)
|
||||
|
||||
def get_relations(self, data_dir):
|
||||
"""Gets all labels (relations) in the knowledge graph."""
|
||||
# return list(self.labels)
|
||||
with open(os.path.join(data_dir, "relations.txt"), 'r') as f:
|
||||
lines = f.readlines()
|
||||
relations = []
|
||||
for line in lines:
|
||||
relations.append(line.strip().split('\t')[0])
|
||||
rel2token = {ent : f"[RELATION_{i}]" for i, ent in enumerate(relations)}
|
||||
return list(rel2token.values())
|
||||
|
||||
def get_labels(self, data_dir):
|
||||
"""Gets all labels (0, 1) for triples in the knowledge graph."""
|
||||
relation = []
|
||||
with open(os.path.join(data_dir, "relation2text.txt"), 'r') as f:
|
||||
lines = f.readlines()
|
||||
entities = []
|
||||
for line in lines:
|
||||
relation.append(line.strip().split("\t")[-1])
|
||||
return relation
|
||||
|
||||
def get_entities(self, data_dir):
|
||||
"""Gets all entities in the knowledge graph."""
|
||||
with open(self.entity_path, 'r') as f:
|
||||
lines = f.readlines()
|
||||
entities = []
|
||||
for line in lines:
|
||||
entities.append(line.strip().split("\t")[0])
|
||||
|
||||
ent2token = {ent : f"[ENTITY_{i}]" for i, ent in enumerate(entities)}
|
||||
return list(ent2token.values())
|
||||
|
||||
def get_train_triples(self, data_dir):
|
||||
"""Gets training triples."""
|
||||
return self._read_tsv(os.path.join(data_dir, "train.tsv"))
|
||||
|
||||
def get_dev_triples(self, data_dir):
|
||||
"""Gets validation triples."""
|
||||
return self._read_tsv(os.path.join(data_dir, "dev.tsv"))
|
||||
|
||||
def get_test_triples(self, data_dir, chunk=""):
|
||||
"""Gets test triples."""
|
||||
return self._read_tsv(os.path.join(data_dir, f"test{chunk}.tsv"))
|
||||
|
||||
def _create_examples(self, lines, set_type, data_dir, args):
|
||||
"""Creates examples for the training and dev sets."""
|
||||
# entity to text
|
||||
ent2text = {}
|
||||
ent2text_with_type = {}
|
||||
with open(self.entity_path, 'r') as f:
|
||||
ent_lines = f.readlines()
|
||||
for line in ent_lines:
|
||||
temp = line.strip().split('\t')
|
||||
try:
|
||||
end = temp[1]#.find(',')
|
||||
if "wiki" in data_dir:
|
||||
assert "Q" in temp[0]
|
||||
ent2text[temp[0]] = temp[1].replace("\\n", " ").replace("\\", "") #[:end]
|
||||
except IndexError:
|
||||
# continue
|
||||
end = " "#.find(',')
|
||||
if "wiki" in data_dir:
|
||||
assert "Q" in temp[0]
|
||||
ent2text[temp[0]] = end #[:end]
|
||||
|
||||
entities = list(ent2text.keys())
|
||||
ent2token = {ent : f"[ENTITY_{i}]" for i, ent in enumerate(entities)}
|
||||
ent2id = {ent : i for i, ent in enumerate(entities)}
|
||||
|
||||
rel2text = {}
|
||||
with open(os.path.join(data_dir, "relation2text.txt"), 'r') as f:
|
||||
rel_lines = f.readlines()
|
||||
for line in rel_lines:
|
||||
temp = line.strip().split('\t')
|
||||
rel2text[temp[0]] = temp[1]
|
||||
|
||||
relation_names = {}
|
||||
with open(os.path.join(data_dir, "relations.txt"), "r") as file:
|
||||
for line in file.readlines():
|
||||
t = line.strip()
|
||||
relation_names[t] = rel2text[t]
|
||||
|
||||
tmp_lines = []
|
||||
not_in_text = 0
|
||||
for line in tqdm(lines, desc="delete entities without text name."):
|
||||
if (line[0] not in ent2text) or (line[2] not in ent2text) or (line[1] not in rel2text):
|
||||
not_in_text += 1
|
||||
continue
|
||||
tmp_lines.append(line)
|
||||
lines = tmp_lines
|
||||
print(f"total entity not in text : {not_in_text} ")
|
||||
|
||||
relations = list(rel2text.keys())
|
||||
rel2token = {rel : f"[RELATION_{i}]" for i, rel in enumerate(relations)}
|
||||
# rel id -> relation token id
|
||||
num_entities = len(self.get_entities(args.data_dir))
|
||||
rel2id = {w:i+num_entities for i,w in enumerate(relation_names.keys())}
|
||||
|
||||
|
||||
with open(os.path.join(data_dir, "masked_head_neighbor.txt"), 'r') as file:
|
||||
masked_head_neighbor = json.load(file)
|
||||
|
||||
with open(os.path.join(data_dir, "masked_tail_neighbor.txt"), 'r') as file:
|
||||
masked_tail_neighbor = json.load(file)
|
||||
|
||||
examples = []
|
||||
# head filter head entity
|
||||
head_filter_entities = defaultdict(list)
|
||||
tail_filter_entities = defaultdict(list)
|
||||
|
||||
dataset_list = ["train.tsv", "dev.tsv", "test.tsv"]
|
||||
# in training, only use the train triples
|
||||
if set_type == "train" and not args.pretrain: dataset_list = dataset_list[0:1]
|
||||
for m in dataset_list:
|
||||
with open(os.path.join(data_dir, m), 'r') as file:
|
||||
train_lines = file.readlines()
|
||||
for idx in range(len(train_lines)):
|
||||
train_lines[idx] = train_lines[idx].strip().split("\t")
|
||||
|
||||
for line in train_lines:
|
||||
tail_filter_entities["\t".join([line[0], line[1]])].append(line[2])
|
||||
head_filter_entities["\t".join([line[2], line[1]])].append(line[0])
|
||||
|
||||
max_head_entities = max(len(_) for _ in head_filter_entities.values())
|
||||
max_tail_entities = max(len(_) for _ in tail_filter_entities.values())
|
||||
|
||||
# use bce loss, ignore the mlm
|
||||
if set_type == "train" and args.bce:
|
||||
lines = []
|
||||
for k, v in tail_filter_entities.items():
|
||||
h, r = k.split('\t')
|
||||
t = v[0]
|
||||
lines.append([h, r, t])
|
||||
for k, v in head_filter_entities.items():
|
||||
t, r = k.split('\t')
|
||||
h = v[0]
|
||||
lines.append([h, r, t])
|
||||
|
||||
|
||||
# for training , select each entity as for get mask embedding.
|
||||
if args.pretrain:
|
||||
rel = list(rel2text.keys())[0]
|
||||
lines = []
|
||||
for k in ent2text.keys():
|
||||
lines.append([k, rel, k])
|
||||
|
||||
print(f"max number of filter entities : {max_head_entities} {max_tail_entities}")
|
||||
# 把子图信息加入到filter_init中(初始化为文件夹,及固定子图),设置为全局变量,solve中调用
|
||||
from os import cpu_count
|
||||
threads = min(1, cpu_count())
|
||||
filter_init(head_filter_entities, tail_filter_entities,ent2text, rel2text, ent2id, ent2token, rel2id, masked_head_neighbor, masked_tail_neighbor, rel2token
|
||||
)
|
||||
|
||||
if hasattr(args, "faiss_init") and args.faiss_init:
|
||||
annotate_ = partial(
|
||||
solve_get_knowledge_store,
|
||||
pretrain=self.args.pretrain
|
||||
)
|
||||
else:
|
||||
annotate_ = partial(
|
||||
solve,
|
||||
pretrain=self.args.pretrain,
|
||||
max_triplet=self.args.max_triplet
|
||||
)
|
||||
examples = list(
|
||||
tqdm(
|
||||
map(annotate_, lines),
|
||||
total=len(lines),
|
||||
desc="convert text to examples"
|
||||
)
|
||||
)
|
||||
|
||||
tmp_examples = []
|
||||
for e in examples:
|
||||
for ee in e:
|
||||
tmp_examples.append(ee)
|
||||
examples = tmp_examples
|
||||
# delete vars
|
||||
del head_filter_entities, tail_filter_entities, ent2text, rel2text, ent2id, ent2token, rel2id
|
||||
return examples
|
||||
|
||||
class Verbalizer(object):
|
||||
def __init__(self, args):
|
||||
if "WN18RR" in args.data_dir:
|
||||
self.mode = "WN18RR"
|
||||
elif "FB15k" in args.data_dir:
|
||||
self.mode = "FB15k"
|
||||
elif "umls" in args.data_dir:
|
||||
self.mode = "umls"
|
||||
elif "codexs" in args.data_dir:
|
||||
self.mode = "codexs"
|
||||
elif "FB13" in args.data_dir:
|
||||
self.mode = "FB13"
|
||||
elif "WN11" in args.data_dir:
|
||||
self.mode = "WN11"
|
||||
|
||||
|
||||
def _convert(self, head, relation, tail):
|
||||
if self.mode == "umls":
|
||||
return f"The {relation} {head} is "
|
||||
|
||||
return f"{head} {relation}"
|
||||
|
||||
|
||||
class KGCDataset(Dataset):
|
||||
def __init__(self, features):
|
||||
self.features = features
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.features[index]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.features)
|
||||
|
||||
def convert_examples_to_features_init(tokenizer_for_convert):
|
||||
global tokenizer
|
||||
tokenizer = tokenizer_for_convert
|
||||
|
||||
def convert_examples_to_features(example, max_seq_length, mode, pretrain=1):
|
||||
"""Loads a data file into a list of `InputBatch`s."""
|
||||
text_a = " ".join(example.text_a.split()[:128])
|
||||
text_b = " ".join(example.text_b.split()[:128])
|
||||
text_c = " ".join(example.text_c.split()[:128])
|
||||
|
||||
if pretrain:
|
||||
input_text_a = text_a
|
||||
input_text_b = text_b
|
||||
else:
|
||||
input_text_a = " ".join([text_a, text_b])
|
||||
input_text_b = text_c
|
||||
|
||||
|
||||
inputs = tokenizer(
|
||||
input_text_a,
|
||||
input_text_b,
|
||||
truncation="longest_first",
|
||||
max_length=max_seq_length,
|
||||
padding="longest",
|
||||
add_special_tokens=True,
|
||||
)
|
||||
# assert tokenizer.mask_token_id in inputs.input_ids, "mask token must in input"
|
||||
|
||||
features = asdict(InputFeatures(input_ids=inputs["input_ids"],
|
||||
attention_mask=inputs['attention_mask'],
|
||||
labels=torch.tensor(example.label),
|
||||
label=torch.tensor(example.real_label)
|
||||
)
|
||||
)
|
||||
return features
|
||||
|
||||
|
||||
def _truncate_seq_pair(tokens_a, tokens_b, max_length):
|
||||
"""Truncates a sequence pair in place to the maximum length."""
|
||||
|
||||
# This is a simple heuristic which will always truncate the longer sequence
|
||||
# one token at a time. This makes more sense than truncating an equal percent
|
||||
# of tokens from each, since if one sequence is very short then each token
|
||||
# that's truncated likely contains more information than a longer sequence.
|
||||
while True:
|
||||
total_length = len(tokens_a) + len(tokens_b)
|
||||
if total_length <= max_length:
|
||||
break
|
||||
if len(tokens_a) > len(tokens_b):
|
||||
tokens_a.pop()
|
||||
else:
|
||||
tokens_b.pop()
|
||||
|
||||
def _truncate_seq_triple(tokens_a, tokens_b, tokens_c, max_length):
|
||||
"""Truncates a sequence triple in place to the maximum length."""
|
||||
|
||||
# This is a simple heuristic which will always truncate the longer sequence
|
||||
# one token at a time. This makes more sense than truncating an equal percent
|
||||
# of tokens from each, since if one sequence is very short then each token
|
||||
# that's truncated likely contains more information than a longer sequence.
|
||||
while True:
|
||||
total_length = len(tokens_a) + len(tokens_b) + len(tokens_c)
|
||||
if total_length <= max_length:
|
||||
break
|
||||
if len(tokens_a) > len(tokens_b) and len(tokens_a) > len(tokens_c):
|
||||
tokens_a.pop()
|
||||
elif len(tokens_b) > len(tokens_a) and len(tokens_b) > len(tokens_c):
|
||||
tokens_b.pop()
|
||||
elif len(tokens_c) > len(tokens_a) and len(tokens_c) > len(tokens_b):
|
||||
tokens_c.pop()
|
||||
else:
|
||||
tokens_c.pop()
|
||||
|
||||
|
||||
@cache_results(_cache_fp="./dataset")
|
||||
def get_dataset(args, processor, label_list, tokenizer, mode):
|
||||
|
||||
assert mode in ["train", "dev", "test"], "mode must be in train dev test!"
|
||||
|
||||
# use training data to construct the entity embedding
|
||||
combine_train_and_test = False
|
||||
if args.faiss_init and mode == "test" and not args.pretrain:
|
||||
mode = "train"
|
||||
if "ind" in args.data_dir: combine_train_and_test = True
|
||||
else:
|
||||
pass
|
||||
|
||||
if mode == "train":
|
||||
train_examples = processor.get_train_examples(args.data_dir)
|
||||
elif mode == "dev":
|
||||
train_examples = processor.get_dev_examples(args.data_dir)
|
||||
else:
|
||||
train_examples = processor.get_test_examples(args.data_dir)
|
||||
|
||||
if combine_train_and_test:
|
||||
logger.info("use all the dataset for getting the entity mask embedding in pretraining pretraining")
|
||||
logger.info("use all the dataset for getting the entity mask embedding in pretraining pretraining")
|
||||
train_examples = processor.get_test_examples(args.data_dir) + processor.get_train_examples(args.data_dir) + processor.get_dev_examples(args.data_dir)
|
||||
|
||||
from os import cpu_count
|
||||
with open(os.path.join(args.data_dir, f"examples_{mode}.txt"), 'w') as file:
|
||||
for line in train_examples:
|
||||
d = {}
|
||||
d.update(line.__dict__)
|
||||
file.write(json.dumps(d) + '\n')
|
||||
|
||||
# 这里应该不需要重新from_pretrain,必须沿用加入token的
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=False)
|
||||
|
||||
features = []
|
||||
|
||||
file_inputs = [os.path.join(args.data_dir, f"examples_{mode}.txt")]
|
||||
file_outputs = [os.path.join(args.data_dir, f"features_{mode}.txt")]
|
||||
|
||||
with contextlib.ExitStack() as stack:
|
||||
inputs = [
|
||||
stack.enter_context(open(input, "r", encoding="utf-8"))
|
||||
if input != "-" else sys.stdin
|
||||
for input in file_inputs
|
||||
]
|
||||
outputs = [
|
||||
stack.enter_context(open(output, "w", encoding="utf-8"))
|
||||
if output != "-" else sys.stdout
|
||||
for output in file_outputs
|
||||
]
|
||||
|
||||
encoder = MultiprocessingEncoder(tokenizer, args)
|
||||
pool = Pool(16, initializer=encoder.initializer)
|
||||
encoder.initializer()
|
||||
encoded_lines = pool.imap(encoder.encode_lines, zip(*inputs), 1000)
|
||||
# encoded_lines = map(encoder.encode_lines, zip(*inputs))
|
||||
|
||||
stats = Counter()
|
||||
for i, (filt, enc_lines) in tqdm(enumerate(encoded_lines, start=1), total=len(train_examples)):
|
||||
if filt == "PASS":
|
||||
for enc_line, output_h in zip(enc_lines, outputs):
|
||||
features.append(eval(enc_line))
|
||||
# features.append(enc_line)
|
||||
# print(enc_line, file=output_h)
|
||||
else:
|
||||
stats["num_filtered_" + filt] += 1
|
||||
|
||||
for k, v in stats.most_common():
|
||||
print("[{}] filtered {} lines".format(k, v), file=sys.stderr)
|
||||
|
||||
for f_id, f in enumerate(features):
|
||||
en = features[f_id].pop("en")
|
||||
rel = features[f_id].pop("rel")
|
||||
graph = features[f_id].pop("graph")
|
||||
real_label = f['label']
|
||||
features[f_id]['distance_attention'] = torch.Tensor(features[f_id]['distance_attention'])
|
||||
|
||||
cnt = 0
|
||||
cnt_2 = 0
|
||||
if not isinstance(en, list): break
|
||||
|
||||
pos = 0
|
||||
for i,t in enumerate(f['input_ids']):
|
||||
if t == tokenizer.pad_token_id:
|
||||
features[f_id]['input_ids'][i] = en[cnt] + len(tokenizer)
|
||||
cnt += 1
|
||||
if t == tokenizer.unk_token_id:
|
||||
features[f_id]['input_ids'][i] = graph[cnt_2] + len(tokenizer)
|
||||
cnt_2 += 1
|
||||
if features[f_id]['input_ids'][i] == real_label + len(tokenizer):
|
||||
pos = i
|
||||
if cnt_2 == len(graph) and cnt == len(en): break
|
||||
# 如果等于UNK, pop出图节点list,然后替换
|
||||
assert not (args.faiss_init and pos == 0)
|
||||
features[f_id]['pos'] = pos
|
||||
|
||||
# for i,t in enumerate(f['input_ids']):
|
||||
# if t == tokenizer.pad_token_id:
|
||||
# features[f_id]['input_ids'][i] = rel + len(tokenizer) + num_entities
|
||||
# break
|
||||
|
||||
|
||||
|
||||
features = KGCDataset(features)
|
||||
return features
|
||||
|
||||
|
||||
class MultiprocessingEncoder(object):
|
||||
def __init__(self, tokenizer, args):
|
||||
self.tokenizer = tokenizer
|
||||
self.pretrain = args.pretrain
|
||||
self.max_seq_length = args.max_seq_length
|
||||
|
||||
def initializer(self):
|
||||
global bpe
|
||||
bpe = self.tokenizer
|
||||
|
||||
def encode(self, line):
|
||||
global bpe
|
||||
ids = bpe.encode(line)
|
||||
return list(map(str, ids))
|
||||
|
||||
def decode(self, tokens):
|
||||
global bpe
|
||||
return bpe.decode(tokens)
|
||||
|
||||
def encode_lines(self, lines):
|
||||
"""
|
||||
Encode a set of lines. All lines will be encoded together.
|
||||
"""
|
||||
enc_lines = []
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
return ["EMPTY", None]
|
||||
# enc_lines.append(" ".join(tokens))
|
||||
enc_lines.append(json.dumps(self.convert_examples_to_features(example=eval(line))))
|
||||
# enc_lines.append(" ")
|
||||
# enc_lines.append("123")
|
||||
return ["PASS", enc_lines]
|
||||
|
||||
def decode_lines(self, lines):
|
||||
dec_lines = []
|
||||
for line in lines:
|
||||
tokens = map(int, line.strip().split())
|
||||
dec_lines.append(self.decode(tokens))
|
||||
return ["PASS", dec_lines]
|
||||
|
||||
def convert_examples_to_features(self, example):
|
||||
pretrain = self.pretrain
|
||||
max_seq_length = self.max_seq_length
|
||||
global bpe
|
||||
"""Loads a data file into a list of `InputBatch`s."""
|
||||
# tokens_a = tokenizer.tokenize(example.text_a)
|
||||
# tokens_b = tokenizer.tokenize(example.text_b)
|
||||
# tokens_c = tokenizer.tokenize(example.text_c)
|
||||
|
||||
# _truncate_seq_triple(tokens_a, tokens_b, tokens_c, max_length= max_seq_length)
|
||||
# text_a = " ".join(example['text_a'].split()[:128])
|
||||
# text_b = " ".join(example['text_b'].split()[:128])
|
||||
# text_c = " ".join(example['text_c'].split()[:128])
|
||||
|
||||
text_a = example['text_a']
|
||||
text_b = example['text_b']
|
||||
text_c = example['text_c']
|
||||
text_d = example['text_d']
|
||||
graph_list = example['graph_inf']
|
||||
|
||||
if pretrain:
|
||||
# the des of xxx is [MASK] .
|
||||
input_text = f"The description of {text_a} is that {text_b} ."
|
||||
inputs = bpe(
|
||||
input_text,
|
||||
truncation="longest_first",
|
||||
max_length=max_seq_length,
|
||||
padding="longest",
|
||||
add_special_tokens=True,
|
||||
)
|
||||
else:
|
||||
if text_a == "[MASK]":
|
||||
input_text_a = " ".join([text_a, text_b])
|
||||
input_text_b = text_c
|
||||
origin_triplet = ["MASK"] + example['en']
|
||||
graph_seq = ["MASK"] + example['en'] + text_d
|
||||
else:
|
||||
input_text_a = text_a
|
||||
input_text_b = " ".join([text_b, text_c])
|
||||
origin_triplet = example['en'] + ["MASK"]
|
||||
graph_seq = example['en'] + ["MASK"] + text_d
|
||||
# 加入graph信息, 拼接等量[UNK]
|
||||
input_text_b = " ".join(["[CLS]", input_text_a, input_text_b, bpe.unk_token * len(text_d)])
|
||||
|
||||
inputs = bpe(
|
||||
input_text_b,
|
||||
truncation="longest_first",
|
||||
max_length=max_seq_length,
|
||||
padding="longest",
|
||||
add_special_tokens=False,
|
||||
)
|
||||
# assert bpe.mask_token_id in inputs.input_ids, "mask token must in input"
|
||||
|
||||
# graph_seq = input_text_b[] 把图结构信息读取出来
|
||||
# [CLS] [ENTITY_13258] [RELATION_68] [MASK] [ENTITY_4] [RELATION_127] [ENTITY_8] [RELATION_9] [ENTITY_9011] [ENTITY_12477] [PAD] [PAD]
|
||||
# 获取图结构信息
|
||||
# 首先在solve中加入一个存储所有子图三元组的临时存储变量
|
||||
# 在这里graph_information = example['graph']
|
||||
new_rel = set()
|
||||
new_rel.add(tuple((origin_triplet[0], origin_triplet[1])))
|
||||
new_rel.add(tuple((origin_triplet[1], origin_triplet[0])))
|
||||
new_rel.add(tuple((origin_triplet[1], origin_triplet[2])))
|
||||
new_rel.add(tuple((origin_triplet[2], origin_triplet[1])))
|
||||
for triplet in graph_list:
|
||||
rel1, rel2, rel3, rel4 = tuple((triplet[0], triplet[1])), tuple((triplet[1], triplet[2])), tuple((triplet[1], triplet[0])), tuple((triplet[2], triplet[1]))
|
||||
new_rel.add(rel1)
|
||||
new_rel.add(rel2)
|
||||
new_rel.add(rel3)
|
||||
new_rel.add(rel4)
|
||||
# 这里的三元组转换为new_rel
|
||||
KGid2Graphid_map = defaultdict(int)
|
||||
for i in range(len(graph_seq)):
|
||||
KGid2Graphid_map[graph_seq[i]] = i
|
||||
|
||||
N = len(graph_seq)
|
||||
adj = torch.zeros([N, N], dtype=torch.bool)
|
||||
for item in list(new_rel):
|
||||
adj[KGid2Graphid_map[item[0]], KGid2Graphid_map[item[1]]] = True
|
||||
shortest_path_result, _ = algos.floyd_warshall(adj.numpy())
|
||||
max_dist = np.amax(shortest_path_result)
|
||||
# [PAD]部分, [CLS]部分补全, [SEP]额外引入也当作[PAD]处理
|
||||
# 加上一个attention_bias, PAD部分设置为-inf,在送入model前,对其进行处理, 将其相加(让模型无法关注PAD)
|
||||
|
||||
# 加入attention到huggingface的BertForMaskedLM(这个可能需要再去查查)
|
||||
# attention_bias = torch.zero(N, N, dtype=torch.float)
|
||||
# attention_bias[torch.tensor(shortest_path_result == )]
|
||||
features = asdict(InputFeatures(input_ids=inputs["input_ids"],
|
||||
attention_mask=inputs['attention_mask'],
|
||||
labels=example['label'],
|
||||
label=example['real_label'],
|
||||
en=example['en_id'],
|
||||
rel=example['rel'],
|
||||
graph=example['text_d_id'],
|
||||
distance_attention = shortest_path_result.tolist(),
|
||||
)
|
||||
)
|
||||
return features
|
15
data/wikidata12k/about.txt
Normal file
15
data/wikidata12k/about.txt
Normal file
@ -0,0 +1,15 @@
|
||||
# triples: 291818
|
||||
# entities: 12554
|
||||
# relations: 423
|
||||
# timesteps: 70
|
||||
# test triples: 19271
|
||||
# valid triples: 20208
|
||||
# train triples: 252339
|
||||
Measure method: N/A
|
||||
Target Size : 423
|
||||
Grow Factor: 0
|
||||
Shrink Factor: 4.0
|
||||
Epsilon Factor: 0
|
||||
Search method: N/A
|
||||
filter_dupes: inter
|
||||
nonames: False
|
12554
data/wikidata12k/entities.dict
Normal file
12554
data/wikidata12k/entities.dict
Normal file
File diff suppressed because it is too large
Load Diff
423
data/wikidata12k/relations.dict
Normal file
423
data/wikidata12k/relations.dict
Normal file
@ -0,0 +1,423 @@
|
||||
0 P131[0-0]
|
||||
1 P131[1-1]
|
||||
2 P131[2-2]
|
||||
3 P131[3-3]
|
||||
4 P131[4-4]
|
||||
5 P131[5-5]
|
||||
6 P131[6-6]
|
||||
7 P131[7-7]
|
||||
8 P131[8-8]
|
||||
9 P131[9-9]
|
||||
10 P131[10-10]
|
||||
11 P131[11-11]
|
||||
12 P131[12-12]
|
||||
13 P131[13-13]
|
||||
14 P131[14-14]
|
||||
15 P131[15-15]
|
||||
16 P131[16-16]
|
||||
17 P131[17-17]
|
||||
18 P131[18-18]
|
||||
19 P131[19-19]
|
||||
20 P131[20-20]
|
||||
21 P131[21-21]
|
||||
22 P131[22-22]
|
||||
23 P131[23-23]
|
||||
24 P131[24-24]
|
||||
25 P131[25-25]
|
||||
26 P131[26-26]
|
||||
27 P131[27-27]
|
||||
28 P131[28-28]
|
||||
29 P131[29-29]
|
||||
30 P131[30-30]
|
||||
31 P131[31-31]
|
||||
32 P131[32-32]
|
||||
33 P131[33-33]
|
||||
34 P131[34-34]
|
||||
35 P131[35-35]
|
||||
36 P131[36-36]
|
||||
37 P131[37-37]
|
||||
38 P131[38-38]
|
||||
39 P131[39-39]
|
||||
40 P131[40-40]
|
||||
41 P131[41-41]
|
||||
42 P131[42-42]
|
||||
43 P131[43-43]
|
||||
44 P131[44-44]
|
||||
45 P131[45-45]
|
||||
46 P131[46-46]
|
||||
47 P131[47-47]
|
||||
48 P131[48-48]
|
||||
49 P131[49-49]
|
||||
50 P131[50-50]
|
||||
51 P131[51-51]
|
||||
52 P131[52-52]
|
||||
53 P131[53-53]
|
||||
54 P131[54-54]
|
||||
55 P131[55-55]
|
||||
56 P131[56-56]
|
||||
57 P131[57-57]
|
||||
58 P131[58-58]
|
||||
59 P131[59-59]
|
||||
60 P131[60-60]
|
||||
61 P131[61-61]
|
||||
62 P131[62-62]
|
||||
63 P131[63-63]
|
||||
64 P131[64-64]
|
||||
65 P131[65-65]
|
||||
66 P131[66-66]
|
||||
67 P131[67-67]
|
||||
68 P131[68-68]
|
||||
69 P131[69-69]
|
||||
70 P1435[65-65]
|
||||
71 P39[49-49]
|
||||
72 P39[50-50]
|
||||
73 P39[51-51]
|
||||
74 P39[52-52]
|
||||
75 P39[53-53]
|
||||
76 P39[54-54]
|
||||
77 P39[55-55]
|
||||
78 P39[56-56]
|
||||
79 P39[57-57]
|
||||
80 P39[58-58]
|
||||
81 P39[59-59]
|
||||
82 P39[60-60]
|
||||
83 P39[61-61]
|
||||
84 P39[62-62]
|
||||
85 P39[63-63]
|
||||
86 P39[64-64]
|
||||
87 P39[65-65]
|
||||
88 P39[66-66]
|
||||
89 P39[67-67]
|
||||
90 P39[68-68]
|
||||
91 P39[69-69]
|
||||
92 P54[40-40]
|
||||
93 P54[41-41]
|
||||
94 P54[42-42]
|
||||
95 P54[43-43]
|
||||
96 P54[44-44]
|
||||
97 P54[45-45]
|
||||
98 P54[46-46]
|
||||
99 P54[47-47]
|
||||
100 P54[48-48]
|
||||
101 P54[49-49]
|
||||
102 P54[50-50]
|
||||
103 P54[51-51]
|
||||
104 P54[52-52]
|
||||
105 P54[53-53]
|
||||
106 P54[54-54]
|
||||
107 P54[55-55]
|
||||
108 P54[56-56]
|
||||
109 P54[57-57]
|
||||
110 P54[58-58]
|
||||
111 P54[59-59]
|
||||
112 P54[60-60]
|
||||
113 P54[61-61]
|
||||
114 P54[62-62]
|
||||
115 P54[63-63]
|
||||
116 P54[64-64]
|
||||
117 P54[65-65]
|
||||
118 P54[66-66]
|
||||
119 P54[67-67]
|
||||
120 P54[68-68]
|
||||
121 P54[69-69]
|
||||
122 P31[0-0]
|
||||
123 P31[1-1]
|
||||
124 P31[2-2]
|
||||
125 P31[3-3]
|
||||
126 P31[4-4]
|
||||
127 P31[5-5]
|
||||
128 P31[6-6]
|
||||
129 P31[7-7]
|
||||
130 P31[8-8]
|
||||
131 P31[9-9]
|
||||
132 P31[10-10]
|
||||
133 P31[11-11]
|
||||
134 P31[12-12]
|
||||
135 P31[13-13]
|
||||
136 P31[14-14]
|
||||
137 P31[15-15]
|
||||
138 P31[16-16]
|
||||
139 P31[17-17]
|
||||
140 P31[18-18]
|
||||
141 P31[19-19]
|
||||
142 P31[20-20]
|
||||
143 P31[21-21]
|
||||
144 P31[22-22]
|
||||
145 P31[23-23]
|
||||
146 P31[24-24]
|
||||
147 P31[25-25]
|
||||
148 P31[26-26]
|
||||
149 P31[27-27]
|
||||
150 P31[28-28]
|
||||
151 P31[29-29]
|
||||
152 P31[30-30]
|
||||
153 P31[31-31]
|
||||
154 P31[32-32]
|
||||
155 P31[33-33]
|
||||
156 P31[34-34]
|
||||
157 P31[35-35]
|
||||
158 P31[36-36]
|
||||
159 P31[37-37]
|
||||
160 P31[38-38]
|
||||
161 P31[39-39]
|
||||
162 P31[40-40]
|
||||
163 P31[41-41]
|
||||
164 P31[42-42]
|
||||
165 P31[43-43]
|
||||
166 P31[44-44]
|
||||
167 P31[45-45]
|
||||
168 P31[46-46]
|
||||
169 P31[47-47]
|
||||
170 P31[48-48]
|
||||
171 P31[49-49]
|
||||
172 P31[50-50]
|
||||
173 P31[51-51]
|
||||
174 P31[52-52]
|
||||
175 P31[53-53]
|
||||
176 P31[54-54]
|
||||
177 P31[55-55]
|
||||
178 P31[56-56]
|
||||
179 P31[57-57]
|
||||
180 P31[58-58]
|
||||
181 P31[59-59]
|
||||
182 P31[60-60]
|
||||
183 P31[61-61]
|
||||
184 P31[62-62]
|
||||
185 P31[63-63]
|
||||
186 P31[64-64]
|
||||
187 P31[65-65]
|
||||
188 P31[66-66]
|
||||
189 P31[67-67]
|
||||
190 P31[68-68]
|
||||
191 P31[69-69]
|
||||
192 P463[26-26]
|
||||
193 P463[27-27]
|
||||
194 P463[28-28]
|
||||
195 P463[29-29]
|
||||
196 P463[30-30]
|
||||
197 P463[31-31]
|
||||
198 P463[32-32]
|
||||
199 P463[33-33]
|
||||
200 P463[34-34]
|
||||
201 P463[35-35]
|
||||
202 P463[36-36]
|
||||
203 P463[37-37]
|
||||
204 P463[38-38]
|
||||
205 P463[39-39]
|
||||
206 P463[40-40]
|
||||
207 P463[41-41]
|
||||
208 P463[42-42]
|
||||
209 P463[43-43]
|
||||
210 P463[44-44]
|
||||
211 P463[45-45]
|
||||
212 P463[46-46]
|
||||
213 P463[47-47]
|
||||
214 P463[48-48]
|
||||
215 P463[49-49]
|
||||
216 P463[50-50]
|
||||
217 P463[51-51]
|
||||
218 P463[52-52]
|
||||
219 P463[53-53]
|
||||
220 P463[54-54]
|
||||
221 P463[55-55]
|
||||
222 P463[56-56]
|
||||
223 P463[57-57]
|
||||
224 P463[58-58]
|
||||
225 P463[59-59]
|
||||
226 P463[60-60]
|
||||
227 P463[61-61]
|
||||
228 P463[62-62]
|
||||
229 P463[63-63]
|
||||
230 P463[64-64]
|
||||
231 P463[65-65]
|
||||
232 P463[66-66]
|
||||
233 P463[67-67]
|
||||
234 P463[68-68]
|
||||
235 P463[69-69]
|
||||
236 P512[4-69]
|
||||
237 P190[0-29]
|
||||
238 P150[0-3]
|
||||
239 P1376[39-47]
|
||||
240 P463[0-7]
|
||||
241 P166[0-7]
|
||||
242 P2962[18-30]
|
||||
243 P108[29-36]
|
||||
244 P39[0-3]
|
||||
245 P17[47-48]
|
||||
246 P166[21-23]
|
||||
247 P793[46-69]
|
||||
248 P69[32-41]
|
||||
249 P17[57-58]
|
||||
250 P190[42-45]
|
||||
251 P2962[39-42]
|
||||
252 P54[0-18]
|
||||
253 P26[56-61]
|
||||
254 P150[14-17]
|
||||
255 P463[16-17]
|
||||
256 P26[39-46]
|
||||
257 P579[36-43]
|
||||
258 P579[16-23]
|
||||
259 P2962[59-60]
|
||||
260 P1411[59-61]
|
||||
261 P26[20-27]
|
||||
262 P6[4-69]
|
||||
263 P1435[33-34]
|
||||
264 P166[52-53]
|
||||
265 P108[49-57]
|
||||
266 P150[10-13]
|
||||
267 P1346[47-68]
|
||||
268 P150[18-21]
|
||||
269 P1346[13-46]
|
||||
270 P69[20-23]
|
||||
271 P39[31-32]
|
||||
272 P1411[32-37]
|
||||
273 P166[62-63]
|
||||
274 P150[44-47]
|
||||
275 P2962[61-62]
|
||||
276 P150[48-51]
|
||||
277 P150[52-55]
|
||||
278 P1411[62-67]
|
||||
279 P1435[35-36]
|
||||
280 P1411[48-51]
|
||||
281 P150[22-25]
|
||||
282 P2962[63-64]
|
||||
283 P2962[65-66]
|
||||
284 P166[58-59]
|
||||
285 P190[46-49]
|
||||
286 P54[34-35]
|
||||
287 P1435[4-16]
|
||||
288 P463[18-19]
|
||||
289 P150[31-34]
|
||||
290 P150[35-38]
|
||||
291 P39[35-36]
|
||||
292 P26[62-69]
|
||||
293 P1411[56-58]
|
||||
294 P1435[37-38]
|
||||
295 P166[60-61]
|
||||
296 P39[33-34]
|
||||
297 P102[24-31]
|
||||
298 P2962[43-46]
|
||||
299 P108[37-48]
|
||||
300 P190[50-53]
|
||||
301 P39[4-6]
|
||||
302 P1435[39-40]
|
||||
303 P793[0-45]
|
||||
304 P150[64-69]
|
||||
305 P39[19-22]
|
||||
306 P27[30-38]
|
||||
307 P2962[31-38]
|
||||
308 P1411[24-31]
|
||||
309 P102[40-45]
|
||||
310 P39[37-38]
|
||||
311 P463[8-11]
|
||||
312 P1435[41-42]
|
||||
313 P27[52-59]
|
||||
314 P69[16-19]
|
||||
315 P17[16-18]
|
||||
316 P190[54-57]
|
||||
317 P1435[43-44]
|
||||
318 P166[8-15]
|
||||
319 P166[45-47]
|
||||
320 P2962[47-50]
|
||||
321 P39[39-40]
|
||||
322 P1411[52-55]
|
||||
323 P108[58-69]
|
||||
324 P463[20-21]
|
||||
325 P39[41-42]
|
||||
326 P150[26-30]
|
||||
327 P150[39-43]
|
||||
328 P1435[45-46]
|
||||
329 P26[28-38]
|
||||
330 P54[27-30]
|
||||
331 P190[58-61]
|
||||
332 P17[59-61]
|
||||
333 P54[36-37]
|
||||
334 P166[16-20]
|
||||
335 P166[37-40]
|
||||
336 P1435[47-48]
|
||||
337 P17[0-3]
|
||||
338 P26[47-55]
|
||||
339 P1435[49-50]
|
||||
340 P1435[25-28]
|
||||
341 P150[4-9]
|
||||
342 P102[63-69]
|
||||
343 P26[0-19]
|
||||
344 P1435[17-24]
|
||||
345 P39[23-26]
|
||||
346 P1435[51-52]
|
||||
347 P39[7-11]
|
||||
348 P69[12-15]
|
||||
349 P69[24-31]
|
||||
350 P102[0-23]
|
||||
351 P39[43-44]
|
||||
352 P579[24-35]
|
||||
353 P190[62-65]
|
||||
354 P1435[53-54]
|
||||
355 P1376[0-18]
|
||||
356 P27[0-14]
|
||||
357 P463[12-15]
|
||||
358 P166[33-36]
|
||||
359 P102[32-39]
|
||||
360 P17[4-7]
|
||||
361 P190[30-41]
|
||||
362 P166[24-28]
|
||||
363 P190[66-69]
|
||||
364 P69[42-69]
|
||||
365 P1435[55-56]
|
||||
366 P54[31-33]
|
||||
367 P39[45-46]
|
||||
368 P17[12-15]
|
||||
369 P1435[57-58]
|
||||
370 P54[19-26]
|
||||
371 P2962[51-54]
|
||||
372 P2962[67-69]
|
||||
373 P1435[59-60]
|
||||
374 P579[44-56]
|
||||
375 P1435[61-62]
|
||||
376 P166[41-44]
|
||||
377 P17[19-22]
|
||||
378 P1376[19-38]
|
||||
379 P17[23-26]
|
||||
380 P1376[48-69]
|
||||
381 P463[22-23]
|
||||
382 P17[27-30]
|
||||
383 P1435[63-64]
|
||||
384 P69[0-3]
|
||||
385 P1435[66-67]
|
||||
386 P17[35-38]
|
||||
387 P69[8-11]
|
||||
388 P1435[68-69]
|
||||
389 P17[31-34]
|
||||
390 P102[46-53]
|
||||
391 P27[60-69]
|
||||
392 P579[57-69]
|
||||
393 P69[4-7]
|
||||
394 P1411[7-14]
|
||||
395 P551[0-35]
|
||||
396 P108[0-28]
|
||||
397 P17[8-11]
|
||||
398 P1411[38-47]
|
||||
399 P17[43-46]
|
||||
400 P17[49-52]
|
||||
401 P166[64-69]
|
||||
402 P1435[29-32]
|
||||
403 P54[38-39]
|
||||
404 P39[27-30]
|
||||
405 P2962[55-58]
|
||||
406 P463[24-25]
|
||||
407 P17[39-42]
|
||||
408 P17[53-56]
|
||||
409 P17[66-69]
|
||||
410 P17[62-65]
|
||||
411 P1411[15-23]
|
||||
412 P166[48-51]
|
||||
413 P27[15-29]
|
||||
414 P150[56-63]
|
||||
415 P27[39-51]
|
||||
416 P39[47-48]
|
||||
417 P166[29-32]
|
||||
418 P39[12-18]
|
||||
419 P166[54-57]
|
||||
420 P551[36-69]
|
||||
421 P579[0-15]
|
||||
422 P102[54-62]
|
19271
data/wikidata12k/test.txt
Normal file
19271
data/wikidata12k/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
71
data/wikidata12k/time_map.dict
Normal file
71
data/wikidata12k/time_map.dict
Normal file
@ -0,0 +1,71 @@
|
||||
0 19 19
|
||||
1 20 1643
|
||||
2 1644 1790
|
||||
3 1791 1816
|
||||
4 1817 1855
|
||||
5 1856 1871
|
||||
6 1872 1893
|
||||
7 1894 1905
|
||||
8 1906 1913
|
||||
9 1914 1918
|
||||
10 1919 1920
|
||||
11 1921 1924
|
||||
12 1925 1929
|
||||
13 1930 1933
|
||||
14 1934 1937
|
||||
15 1938 1941
|
||||
16 1942 1945
|
||||
17 1946 1948
|
||||
18 1949 1950
|
||||
19 1951 1953
|
||||
20 1954 1956
|
||||
21 1957 1959
|
||||
22 1960 1961
|
||||
23 1962 1963
|
||||
24 1964 1965
|
||||
25 1966 1967
|
||||
26 1968 1968
|
||||
27 1969 1970
|
||||
28 1971 1972
|
||||
29 1973 1974
|
||||
30 1975 1976
|
||||
31 1977 1978
|
||||
32 1979 1980
|
||||
33 1981 1982
|
||||
34 1983 1983
|
||||
35 1984 1984
|
||||
36 1985 1985
|
||||
37 1986 1986
|
||||
38 1987 1987
|
||||
39 1988 1988
|
||||
40 1989 1989
|
||||
41 1990 1990
|
||||
42 1991 1991
|
||||
43 1992 1992
|
||||
44 1993 1993
|
||||
45 1994 1994
|
||||
46 1995 1995
|
||||
47 1996 1996
|
||||
48 1997 1997
|
||||
49 1998 1998
|
||||
50 1999 1999
|
||||
51 2000 2000
|
||||
52 2001 2001
|
||||
53 2002 2002
|
||||
54 2003 2003
|
||||
55 2004 2004
|
||||
56 2005 2005
|
||||
57 2006 2006
|
||||
58 2007 2007
|
||||
59 2008 2008
|
||||
60 2009 2009
|
||||
61 2010 2010
|
||||
62 2011 2011
|
||||
63 2012 2012
|
||||
64 2013 2013
|
||||
65 2014 2014
|
||||
66 2015 2015
|
||||
67 2016 2016
|
||||
68 2017 2017
|
||||
69 2018 2020
|
||||
70 2021 2021
|
252339
data/wikidata12k/train.txt
Normal file
252339
data/wikidata12k/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
20208
data/wikidata12k/valid.txt
Normal file
20208
data/wikidata12k/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
15
data/wikidata12k_both/about.txt
Normal file
15
data/wikidata12k_both/about.txt
Normal file
@ -0,0 +1,15 @@
|
||||
# triples: 231529
|
||||
# entities: 12554
|
||||
# relations: 423
|
||||
# timesteps: 70
|
||||
# test triples: 16195
|
||||
# valid triples: 16707
|
||||
# train triples: 198627
|
||||
Measure method: N/A
|
||||
Target Size : 423
|
||||
Grow Factor: 0
|
||||
Shrink Factor: 4.0
|
||||
Epsilon Factor: 0
|
||||
Search method: N/A
|
||||
filter_dupes: both
|
||||
nonames: False
|
40621
data/wikidata12k_both/complete_type.txt
Normal file
40621
data/wikidata12k_both/complete_type.txt
Normal file
File diff suppressed because it is too large
Load Diff
12554
data/wikidata12k_both/entities.dict
Normal file
12554
data/wikidata12k_both/entities.dict
Normal file
File diff suppressed because it is too large
Load Diff
423
data/wikidata12k_both/relations.dict
Normal file
423
data/wikidata12k_both/relations.dict
Normal file
@ -0,0 +1,423 @@
|
||||
0 P131[0-0]
|
||||
1 P131[1-1]
|
||||
2 P131[2-2]
|
||||
3 P131[3-3]
|
||||
4 P131[4-4]
|
||||
5 P131[5-5]
|
||||
6 P131[6-6]
|
||||
7 P131[7-7]
|
||||
8 P131[8-8]
|
||||
9 P131[9-9]
|
||||
10 P131[10-10]
|
||||
11 P131[11-11]
|
||||
12 P131[12-12]
|
||||
13 P131[13-13]
|
||||
14 P131[14-14]
|
||||
15 P131[15-15]
|
||||
16 P131[16-16]
|
||||
17 P131[17-17]
|
||||
18 P131[18-18]
|
||||
19 P131[19-19]
|
||||
20 P131[20-20]
|
||||
21 P131[21-21]
|
||||
22 P131[22-22]
|
||||
23 P131[23-23]
|
||||
24 P131[24-24]
|
||||
25 P131[25-25]
|
||||
26 P131[26-26]
|
||||
27 P131[27-27]
|
||||
28 P131[28-28]
|
||||
29 P131[29-29]
|
||||
30 P131[30-30]
|
||||
31 P131[31-31]
|
||||
32 P131[32-32]
|
||||
33 P131[33-33]
|
||||
34 P131[34-34]
|
||||
35 P131[35-35]
|
||||
36 P131[36-36]
|
||||
37 P131[37-37]
|
||||
38 P131[38-38]
|
||||
39 P131[39-39]
|
||||
40 P131[40-40]
|
||||
41 P131[41-41]
|
||||
42 P131[42-42]
|
||||
43 P131[43-43]
|
||||
44 P131[44-44]
|
||||
45 P131[45-45]
|
||||
46 P131[46-46]
|
||||
47 P131[47-47]
|
||||
48 P131[48-48]
|
||||
49 P131[49-49]
|
||||
50 P131[50-50]
|
||||
51 P131[51-51]
|
||||
52 P131[52-52]
|
||||
53 P131[53-53]
|
||||
54 P131[54-54]
|
||||
55 P131[55-55]
|
||||
56 P131[56-56]
|
||||
57 P131[57-57]
|
||||
58 P131[58-58]
|
||||
59 P131[59-59]
|
||||
60 P131[60-60]
|
||||
61 P131[61-61]
|
||||
62 P131[62-62]
|
||||
63 P131[63-63]
|
||||
64 P131[64-64]
|
||||
65 P131[65-65]
|
||||
66 P131[66-66]
|
||||
67 P131[67-67]
|
||||
68 P131[68-68]
|
||||
69 P131[69-69]
|
||||
70 P1435[65-65]
|
||||
71 P39[49-49]
|
||||
72 P39[50-50]
|
||||
73 P39[51-51]
|
||||
74 P39[52-52]
|
||||
75 P39[53-53]
|
||||
76 P39[54-54]
|
||||
77 P39[55-55]
|
||||
78 P39[56-56]
|
||||
79 P39[57-57]
|
||||
80 P39[58-58]
|
||||
81 P39[59-59]
|
||||
82 P39[60-60]
|
||||
83 P39[61-61]
|
||||
84 P39[62-62]
|
||||
85 P39[63-63]
|
||||
86 P39[64-64]
|
||||
87 P39[65-65]
|
||||
88 P39[66-66]
|
||||
89 P39[67-67]
|
||||
90 P39[68-68]
|
||||
91 P39[69-69]
|
||||
92 P54[40-40]
|
||||
93 P54[41-41]
|
||||
94 P54[42-42]
|
||||
95 P54[43-43]
|
||||
96 P54[44-44]
|
||||
97 P54[45-45]
|
||||
98 P54[46-46]
|
||||
99 P54[47-47]
|
||||
100 P54[48-48]
|
||||
101 P54[49-49]
|
||||
102 P54[50-50]
|
||||
103 P54[51-51]
|
||||
104 P54[52-52]
|
||||
105 P54[53-53]
|
||||
106 P54[54-54]
|
||||
107 P54[55-55]
|
||||
108 P54[56-56]
|
||||
109 P54[57-57]
|
||||
110 P54[58-58]
|
||||
111 P54[59-59]
|
||||
112 P54[60-60]
|
||||
113 P54[61-61]
|
||||
114 P54[62-62]
|
||||
115 P54[63-63]
|
||||
116 P54[64-64]
|
||||
117 P54[65-65]
|
||||
118 P54[66-66]
|
||||
119 P54[67-67]
|
||||
120 P54[68-68]
|
||||
121 P54[69-69]
|
||||
122 P31[0-0]
|
||||
123 P31[1-1]
|
||||
124 P31[2-2]
|
||||
125 P31[3-3]
|
||||
126 P31[4-4]
|
||||
127 P31[5-5]
|
||||
128 P31[6-6]
|
||||
129 P31[7-7]
|
||||
130 P31[8-8]
|
||||
131 P31[9-9]
|
||||
132 P31[10-10]
|
||||
133 P31[11-11]
|
||||
134 P31[12-12]
|
||||
135 P31[13-13]
|
||||
136 P31[14-14]
|
||||
137 P31[15-15]
|
||||
138 P31[16-16]
|
||||
139 P31[17-17]
|
||||
140 P31[18-18]
|
||||
141 P31[19-19]
|
||||
142 P31[20-20]
|
||||
143 P31[21-21]
|
||||
144 P31[22-22]
|
||||
145 P31[23-23]
|
||||
146 P31[24-24]
|
||||
147 P31[25-25]
|
||||
148 P31[26-26]
|
||||
149 P31[27-27]
|
||||
150 P31[28-28]
|
||||
151 P31[29-29]
|
||||
152 P31[30-30]
|
||||
153 P31[31-31]
|
||||
154 P31[32-32]
|
||||
155 P31[33-33]
|
||||
156 P31[34-34]
|
||||
157 P31[35-35]
|
||||
158 P31[36-36]
|
||||
159 P31[37-37]
|
||||
160 P31[38-38]
|
||||
161 P31[39-39]
|
||||
162 P31[40-40]
|
||||
163 P31[41-41]
|
||||
164 P31[42-42]
|
||||
165 P31[43-43]
|
||||
166 P31[44-44]
|
||||
167 P31[45-45]
|
||||
168 P31[46-46]
|
||||
169 P31[47-47]
|
||||
170 P31[48-48]
|
||||
171 P31[49-49]
|
||||
172 P31[50-50]
|
||||
173 P31[51-51]
|
||||
174 P31[52-52]
|
||||
175 P31[53-53]
|
||||
176 P31[54-54]
|
||||
177 P31[55-55]
|
||||
178 P31[56-56]
|
||||
179 P31[57-57]
|
||||
180 P31[58-58]
|
||||
181 P31[59-59]
|
||||
182 P31[60-60]
|
||||
183 P31[61-61]
|
||||
184 P31[62-62]
|
||||
185 P31[63-63]
|
||||
186 P31[64-64]
|
||||
187 P31[65-65]
|
||||
188 P31[66-66]
|
||||
189 P31[67-67]
|
||||
190 P31[68-68]
|
||||
191 P31[69-69]
|
||||
192 P463[26-26]
|
||||
193 P463[27-27]
|
||||
194 P463[28-28]
|
||||
195 P463[29-29]
|
||||
196 P463[30-30]
|
||||
197 P463[31-31]
|
||||
198 P463[32-32]
|
||||
199 P463[33-33]
|
||||
200 P463[34-34]
|
||||
201 P463[35-35]
|
||||
202 P463[36-36]
|
||||
203 P463[37-37]
|
||||
204 P463[38-38]
|
||||
205 P463[39-39]
|
||||
206 P463[40-40]
|
||||
207 P463[41-41]
|
||||
208 P463[42-42]
|
||||
209 P463[43-43]
|
||||
210 P463[44-44]
|
||||
211 P463[45-45]
|
||||
212 P463[46-46]
|
||||
213 P463[47-47]
|
||||
214 P463[48-48]
|
||||
215 P463[49-49]
|
||||
216 P463[50-50]
|
||||
217 P463[51-51]
|
||||
218 P463[52-52]
|
||||
219 P463[53-53]
|
||||
220 P463[54-54]
|
||||
221 P463[55-55]
|
||||
222 P463[56-56]
|
||||
223 P463[57-57]
|
||||
224 P463[58-58]
|
||||
225 P463[59-59]
|
||||
226 P463[60-60]
|
||||
227 P463[61-61]
|
||||
228 P463[62-62]
|
||||
229 P463[63-63]
|
||||
230 P463[64-64]
|
||||
231 P463[65-65]
|
||||
232 P463[66-66]
|
||||
233 P463[67-67]
|
||||
234 P463[68-68]
|
||||
235 P463[69-69]
|
||||
236 P512[4-69]
|
||||
237 P190[0-29]
|
||||
238 P150[0-3]
|
||||
239 P1376[39-47]
|
||||
240 P463[0-7]
|
||||
241 P166[0-7]
|
||||
242 P2962[18-30]
|
||||
243 P108[29-36]
|
||||
244 P39[0-3]
|
||||
245 P17[47-48]
|
||||
246 P166[21-23]
|
||||
247 P793[46-69]
|
||||
248 P69[32-41]
|
||||
249 P17[57-58]
|
||||
250 P190[42-45]
|
||||
251 P2962[39-42]
|
||||
252 P54[0-18]
|
||||
253 P26[56-61]
|
||||
254 P150[14-17]
|
||||
255 P463[16-17]
|
||||
256 P26[39-46]
|
||||
257 P579[36-43]
|
||||
258 P579[16-23]
|
||||
259 P2962[59-60]
|
||||
260 P1411[59-61]
|
||||
261 P26[20-27]
|
||||
262 P6[4-69]
|
||||
263 P1435[33-34]
|
||||
264 P166[52-53]
|
||||
265 P108[49-57]
|
||||
266 P150[10-13]
|
||||
267 P1346[47-68]
|
||||
268 P150[18-21]
|
||||
269 P1346[13-46]
|
||||
270 P69[20-23]
|
||||
271 P39[31-32]
|
||||
272 P1411[32-37]
|
||||
273 P166[62-63]
|
||||
274 P150[44-47]
|
||||
275 P2962[61-62]
|
||||
276 P150[48-51]
|
||||
277 P150[52-55]
|
||||
278 P1411[62-67]
|
||||
279 P1435[35-36]
|
||||
280 P1411[48-51]
|
||||
281 P150[22-25]
|
||||
282 P2962[63-64]
|
||||
283 P2962[65-66]
|
||||
284 P166[58-59]
|
||||
285 P190[46-49]
|
||||
286 P54[34-35]
|
||||
287 P1435[4-16]
|
||||
288 P463[18-19]
|
||||
289 P150[31-34]
|
||||
290 P150[35-38]
|
||||
291 P39[35-36]
|
||||
292 P26[62-69]
|
||||
293 P1411[56-58]
|
||||
294 P1435[37-38]
|
||||
295 P166[60-61]
|
||||
296 P39[33-34]
|
||||
297 P102[24-31]
|
||||
298 P2962[43-46]
|
||||
299 P108[37-48]
|
||||
300 P190[50-53]
|
||||
301 P39[4-6]
|
||||
302 P1435[39-40]
|
||||
303 P793[0-45]
|
||||
304 P150[64-69]
|
||||
305 P39[19-22]
|
||||
306 P27[30-38]
|
||||
307 P2962[31-38]
|
||||
308 P1411[24-31]
|
||||
309 P102[40-45]
|
||||
310 P39[37-38]
|
||||
311 P463[8-11]
|
||||
312 P1435[41-42]
|
||||
313 P27[52-59]
|
||||
314 P69[16-19]
|
||||
315 P17[16-18]
|
||||
316 P190[54-57]
|
||||
317 P1435[43-44]
|
||||
318 P166[8-15]
|
||||
319 P166[45-47]
|
||||
320 P2962[47-50]
|
||||
321 P39[39-40]
|
||||
322 P1411[52-55]
|
||||
323 P108[58-69]
|
||||
324 P463[20-21]
|
||||
325 P39[41-42]
|
||||
326 P150[26-30]
|
||||
327 P150[39-43]
|
||||
328 P1435[45-46]
|
||||
329 P26[28-38]
|
||||
330 P54[27-30]
|
||||
331 P190[58-61]
|
||||
332 P17[59-61]
|
||||
333 P54[36-37]
|
||||
334 P166[16-20]
|
||||
335 P166[37-40]
|
||||
336 P1435[47-48]
|
||||
337 P17[0-3]
|
||||
338 P26[47-55]
|
||||
339 P1435[49-50]
|
||||
340 P1435[25-28]
|
||||
341 P150[4-9]
|
||||
342 P102[63-69]
|
||||
343 P26[0-19]
|
||||
344 P1435[17-24]
|
||||
345 P39[23-26]
|
||||
346 P1435[51-52]
|
||||
347 P39[7-11]
|
||||
348 P69[12-15]
|
||||
349 P69[24-31]
|
||||
350 P102[0-23]
|
||||
351 P39[43-44]
|
||||
352 P579[24-35]
|
||||
353 P190[62-65]
|
||||
354 P1435[53-54]
|
||||
355 P1376[0-18]
|
||||
356 P27[0-14]
|
||||
357 P463[12-15]
|
||||
358 P166[33-36]
|
||||
359 P102[32-39]
|
||||
360 P17[4-7]
|
||||
361 P190[30-41]
|
||||
362 P166[24-28]
|
||||
363 P190[66-69]
|
||||
364 P69[42-69]
|
||||
365 P1435[55-56]
|
||||
366 P54[31-33]
|
||||
367 P39[45-46]
|
||||
368 P17[12-15]
|
||||
369 P1435[57-58]
|
||||
370 P54[19-26]
|
||||
371 P2962[51-54]
|
||||
372 P2962[67-69]
|
||||
373 P1435[59-60]
|
||||
374 P579[44-56]
|
||||
375 P1435[61-62]
|
||||
376 P166[41-44]
|
||||
377 P17[19-22]
|
||||
378 P1376[19-38]
|
||||
379 P17[23-26]
|
||||
380 P1376[48-69]
|
||||
381 P463[22-23]
|
||||
382 P17[27-30]
|
||||
383 P1435[63-64]
|
||||
384 P69[0-3]
|
||||
385 P1435[66-67]
|
||||
386 P17[35-38]
|
||||
387 P69[8-11]
|
||||
388 P1435[68-69]
|
||||
389 P17[31-34]
|
||||
390 P102[46-53]
|
||||
391 P27[60-69]
|
||||
392 P579[57-69]
|
||||
393 P69[4-7]
|
||||
394 P1411[7-14]
|
||||
395 P551[0-35]
|
||||
396 P108[0-28]
|
||||
397 P17[8-11]
|
||||
398 P1411[38-47]
|
||||
399 P17[43-46]
|
||||
400 P17[49-52]
|
||||
401 P166[64-69]
|
||||
402 P1435[29-32]
|
||||
403 P54[38-39]
|
||||
404 P39[27-30]
|
||||
405 P2962[55-58]
|
||||
406 P463[24-25]
|
||||
407 P17[39-42]
|
||||
408 P17[53-56]
|
||||
409 P17[66-69]
|
||||
410 P17[62-65]
|
||||
411 P1411[15-23]
|
||||
412 P166[48-51]
|
||||
413 P27[15-29]
|
||||
414 P150[56-63]
|
||||
415 P27[39-51]
|
||||
416 P39[47-48]
|
||||
417 P166[29-32]
|
||||
418 P39[12-18]
|
||||
419 P166[54-57]
|
||||
420 P551[36-69]
|
||||
421 P579[0-15]
|
||||
422 P102[54-62]
|
16195
data/wikidata12k_both/test.txt
Normal file
16195
data/wikidata12k_both/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
4062
data/wikidata12k_both/test_type.txt
Normal file
4062
data/wikidata12k_both/test_type.txt
Normal file
File diff suppressed because it is too large
Load Diff
71
data/wikidata12k_both/time_map.dict
Normal file
71
data/wikidata12k_both/time_map.dict
Normal file
@ -0,0 +1,71 @@
|
||||
0 19 19
|
||||
1 20 1643
|
||||
2 1644 1790
|
||||
3 1791 1816
|
||||
4 1817 1855
|
||||
5 1856 1871
|
||||
6 1872 1893
|
||||
7 1894 1905
|
||||
8 1906 1913
|
||||
9 1914 1918
|
||||
10 1919 1920
|
||||
11 1921 1924
|
||||
12 1925 1929
|
||||
13 1930 1933
|
||||
14 1934 1937
|
||||
15 1938 1941
|
||||
16 1942 1945
|
||||
17 1946 1948
|
||||
18 1949 1950
|
||||
19 1951 1953
|
||||
20 1954 1956
|
||||
21 1957 1959
|
||||
22 1960 1961
|
||||
23 1962 1963
|
||||
24 1964 1965
|
||||
25 1966 1967
|
||||
26 1968 1968
|
||||
27 1969 1970
|
||||
28 1971 1972
|
||||
29 1973 1974
|
||||
30 1975 1976
|
||||
31 1977 1978
|
||||
32 1979 1980
|
||||
33 1981 1982
|
||||
34 1983 1983
|
||||
35 1984 1984
|
||||
36 1985 1985
|
||||
37 1986 1986
|
||||
38 1987 1987
|
||||
39 1988 1988
|
||||
40 1989 1989
|
||||
41 1990 1990
|
||||
42 1991 1991
|
||||
43 1992 1992
|
||||
44 1993 1993
|
||||
45 1994 1994
|
||||
46 1995 1995
|
||||
47 1996 1996
|
||||
48 1997 1997
|
||||
49 1998 1998
|
||||
50 1999 1999
|
||||
51 2000 2000
|
||||
52 2001 2001
|
||||
53 2002 2002
|
||||
54 2003 2003
|
||||
55 2004 2004
|
||||
56 2005 2005
|
||||
57 2006 2006
|
||||
58 2007 2007
|
||||
59 2008 2008
|
||||
60 2009 2009
|
||||
61 2010 2010
|
||||
62 2011 2011
|
||||
63 2012 2012
|
||||
64 2013 2013
|
||||
65 2014 2014
|
||||
66 2015 2015
|
||||
67 2016 2016
|
||||
68 2017 2017
|
||||
69 2018 2020
|
||||
70 2021 2021
|
198627
data/wikidata12k_both/train.txt
Normal file
198627
data/wikidata12k_both/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
32497
data/wikidata12k_both/train_type.txt
Normal file
32497
data/wikidata12k_both/train_type.txt
Normal file
File diff suppressed because it is too large
Load Diff
16707
data/wikidata12k_both/valid.txt
Normal file
16707
data/wikidata12k_both/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
15
data/wikidata12k_old/about.txt
Normal file
15
data/wikidata12k_old/about.txt
Normal file
@ -0,0 +1,15 @@
|
||||
# triples: 291818
|
||||
# entities: 12554
|
||||
# relations: 423
|
||||
# timesteps: 70
|
||||
# test triples: 19271
|
||||
# valid triples: 20208
|
||||
# train triples: 252339
|
||||
Measure method: N/A
|
||||
Target Size : 423
|
||||
Grow Factor: 0
|
||||
Shrink Factor: 4.0
|
||||
Epsilon Factor: 0
|
||||
Search method: N/A
|
||||
filter_dupes: inter
|
||||
nonames: False
|
12554
data/wikidata12k_old/entities.dict
Normal file
12554
data/wikidata12k_old/entities.dict
Normal file
File diff suppressed because it is too large
Load Diff
1820
data/wikidata12k_old/indices_test.txt
Normal file
1820
data/wikidata12k_old/indices_test.txt
Normal file
File diff suppressed because it is too large
Load Diff
13036
data/wikidata12k_old/indices_train.txt
Normal file
13036
data/wikidata12k_old/indices_train.txt
Normal file
File diff suppressed because it is too large
Load Diff
1796
data/wikidata12k_old/indices_valid.txt
Normal file
1796
data/wikidata12k_old/indices_valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
12554
data/wikidata12k_old/raw_entity2id.txt
Normal file
12554
data/wikidata12k_old/raw_entity2id.txt
Normal file
File diff suppressed because it is too large
Load Diff
24
data/wikidata12k_old/raw_rel2id.txt
Normal file
24
data/wikidata12k_old/raw_rel2id.txt
Normal file
@ -0,0 +1,24 @@
|
||||
P1376 0
|
||||
P512 4
|
||||
P579 3
|
||||
P150 18
|
||||
P190 5
|
||||
P551 19
|
||||
P131 1
|
||||
P793 21
|
||||
P1435 13
|
||||
P39 14
|
||||
P17 6
|
||||
P54 22
|
||||
P31 15
|
||||
P6 7
|
||||
P1411 20
|
||||
P2962 2
|
||||
P463 9
|
||||
P1346 16
|
||||
P108 10
|
||||
P69 23
|
||||
P166 17
|
||||
P102 11
|
||||
P27 12
|
||||
P26 8
|
4062
data/wikidata12k_old/raw_test.txt
Normal file
4062
data/wikidata12k_old/raw_test.txt
Normal file
File diff suppressed because it is too large
Load Diff
423
data/wikidata12k_old/relations.dict
Normal file
423
data/wikidata12k_old/relations.dict
Normal file
@ -0,0 +1,423 @@
|
||||
0 P131[0-0]
|
||||
1 P131[1-1]
|
||||
2 P131[2-2]
|
||||
3 P131[3-3]
|
||||
4 P131[4-4]
|
||||
5 P131[5-5]
|
||||
6 P131[6-6]
|
||||
7 P131[7-7]
|
||||
8 P131[8-8]
|
||||
9 P131[9-9]
|
||||
10 P131[10-10]
|
||||
11 P131[11-11]
|
||||
12 P131[12-12]
|
||||
13 P131[13-13]
|
||||
14 P131[14-14]
|
||||
15 P131[15-15]
|
||||
16 P131[16-16]
|
||||
17 P131[17-17]
|
||||
18 P131[18-18]
|
||||
19 P131[19-19]
|
||||
20 P131[20-20]
|
||||
21 P131[21-21]
|
||||
22 P131[22-22]
|
||||
23 P131[23-23]
|
||||
24 P131[24-24]
|
||||
25 P131[25-25]
|
||||
26 P131[26-26]
|
||||
27 P131[27-27]
|
||||
28 P131[28-28]
|
||||
29 P131[29-29]
|
||||
30 P131[30-30]
|
||||
31 P131[31-31]
|
||||
32 P131[32-32]
|
||||
33 P131[33-33]
|
||||
34 P131[34-34]
|
||||
35 P131[35-35]
|
||||
36 P131[36-36]
|
||||
37 P131[37-37]
|
||||
38 P131[38-38]
|
||||
39 P131[39-39]
|
||||
40 P131[40-40]
|
||||
41 P131[41-41]
|
||||
42 P131[42-42]
|
||||
43 P131[43-43]
|
||||
44 P131[44-44]
|
||||
45 P131[45-45]
|
||||
46 P131[46-46]
|
||||
47 P131[47-47]
|
||||
48 P131[48-48]
|
||||
49 P131[49-49]
|
||||
50 P131[50-50]
|
||||
51 P131[51-51]
|
||||
52 P131[52-52]
|
||||
53 P131[53-53]
|
||||
54 P131[54-54]
|
||||
55 P131[55-55]
|
||||
56 P131[56-56]
|
||||
57 P131[57-57]
|
||||
58 P131[58-58]
|
||||
59 P131[59-59]
|
||||
60 P131[60-60]
|
||||
61 P131[61-61]
|
||||
62 P131[62-62]
|
||||
63 P131[63-63]
|
||||
64 P131[64-64]
|
||||
65 P131[65-65]
|
||||
66 P131[66-66]
|
||||
67 P131[67-67]
|
||||
68 P131[68-68]
|
||||
69 P131[69-69]
|
||||
70 P1435[65-65]
|
||||
71 P39[49-49]
|
||||
72 P39[50-50]
|
||||
73 P39[51-51]
|
||||
74 P39[52-52]
|
||||
75 P39[53-53]
|
||||
76 P39[54-54]
|
||||
77 P39[55-55]
|
||||
78 P39[56-56]
|
||||
79 P39[57-57]
|
||||
80 P39[58-58]
|
||||
81 P39[59-59]
|
||||
82 P39[60-60]
|
||||
83 P39[61-61]
|
||||
84 P39[62-62]
|
||||
85 P39[63-63]
|
||||
86 P39[64-64]
|
||||
87 P39[65-65]
|
||||
88 P39[66-66]
|
||||
89 P39[67-67]
|
||||
90 P39[68-68]
|
||||
91 P39[69-69]
|
||||
92 P54[40-40]
|
||||
93 P54[41-41]
|
||||
94 P54[42-42]
|
||||
95 P54[43-43]
|
||||
96 P54[44-44]
|
||||
97 P54[45-45]
|
||||
98 P54[46-46]
|
||||
99 P54[47-47]
|
||||
100 P54[48-48]
|
||||
101 P54[49-49]
|
||||
102 P54[50-50]
|
||||
103 P54[51-51]
|
||||
104 P54[52-52]
|
||||
105 P54[53-53]
|
||||
106 P54[54-54]
|
||||
107 P54[55-55]
|
||||
108 P54[56-56]
|
||||
109 P54[57-57]
|
||||
110 P54[58-58]
|
||||
111 P54[59-59]
|
||||
112 P54[60-60]
|
||||
113 P54[61-61]
|
||||
114 P54[62-62]
|
||||
115 P54[63-63]
|
||||
116 P54[64-64]
|
||||
117 P54[65-65]
|
||||
118 P54[66-66]
|
||||
119 P54[67-67]
|
||||
120 P54[68-68]
|
||||
121 P54[69-69]
|
||||
122 P31[0-0]
|
||||
123 P31[1-1]
|
||||
124 P31[2-2]
|
||||
125 P31[3-3]
|
||||
126 P31[4-4]
|
||||
127 P31[5-5]
|
||||
128 P31[6-6]
|
||||
129 P31[7-7]
|
||||
130 P31[8-8]
|
||||
131 P31[9-9]
|
||||
132 P31[10-10]
|
||||
133 P31[11-11]
|
||||
134 P31[12-12]
|
||||
135 P31[13-13]
|
||||
136 P31[14-14]
|
||||
137 P31[15-15]
|
||||
138 P31[16-16]
|
||||
139 P31[17-17]
|
||||
140 P31[18-18]
|
||||
141 P31[19-19]
|
||||
142 P31[20-20]
|
||||
143 P31[21-21]
|
||||
144 P31[22-22]
|
||||
145 P31[23-23]
|
||||
146 P31[24-24]
|
||||
147 P31[25-25]
|
||||
148 P31[26-26]
|
||||
149 P31[27-27]
|
||||
150 P31[28-28]
|
||||
151 P31[29-29]
|
||||
152 P31[30-30]
|
||||
153 P31[31-31]
|
||||
154 P31[32-32]
|
||||
155 P31[33-33]
|
||||
156 P31[34-34]
|
||||
157 P31[35-35]
|
||||
158 P31[36-36]
|
||||
159 P31[37-37]
|
||||
160 P31[38-38]
|
||||
161 P31[39-39]
|
||||
162 P31[40-40]
|
||||
163 P31[41-41]
|
||||
164 P31[42-42]
|
||||
165 P31[43-43]
|
||||
166 P31[44-44]
|
||||
167 P31[45-45]
|
||||
168 P31[46-46]
|
||||
169 P31[47-47]
|
||||
170 P31[48-48]
|
||||
171 P31[49-49]
|
||||
172 P31[50-50]
|
||||
173 P31[51-51]
|
||||
174 P31[52-52]
|
||||
175 P31[53-53]
|
||||
176 P31[54-54]
|
||||
177 P31[55-55]
|
||||
178 P31[56-56]
|
||||
179 P31[57-57]
|
||||
180 P31[58-58]
|
||||
181 P31[59-59]
|
||||
182 P31[60-60]
|
||||
183 P31[61-61]
|
||||
184 P31[62-62]
|
||||
185 P31[63-63]
|
||||
186 P31[64-64]
|
||||
187 P31[65-65]
|
||||
188 P31[66-66]
|
||||
189 P31[67-67]
|
||||
190 P31[68-68]
|
||||
191 P31[69-69]
|
||||
192 P463[26-26]
|
||||
193 P463[27-27]
|
||||
194 P463[28-28]
|
||||
195 P463[29-29]
|
||||
196 P463[30-30]
|
||||
197 P463[31-31]
|
||||
198 P463[32-32]
|
||||
199 P463[33-33]
|
||||
200 P463[34-34]
|
||||
201 P463[35-35]
|
||||
202 P463[36-36]
|
||||
203 P463[37-37]
|
||||
204 P463[38-38]
|
||||
205 P463[39-39]
|
||||
206 P463[40-40]
|
||||
207 P463[41-41]
|
||||
208 P463[42-42]
|
||||
209 P463[43-43]
|
||||
210 P463[44-44]
|
||||
211 P463[45-45]
|
||||
212 P463[46-46]
|
||||
213 P463[47-47]
|
||||
214 P463[48-48]
|
||||
215 P463[49-49]
|
||||
216 P463[50-50]
|
||||
217 P463[51-51]
|
||||
218 P463[52-52]
|
||||
219 P463[53-53]
|
||||
220 P463[54-54]
|
||||
221 P463[55-55]
|
||||
222 P463[56-56]
|
||||
223 P463[57-57]
|
||||
224 P463[58-58]
|
||||
225 P463[59-59]
|
||||
226 P463[60-60]
|
||||
227 P463[61-61]
|
||||
228 P463[62-62]
|
||||
229 P463[63-63]
|
||||
230 P463[64-64]
|
||||
231 P463[65-65]
|
||||
232 P463[66-66]
|
||||
233 P463[67-67]
|
||||
234 P463[68-68]
|
||||
235 P463[69-69]
|
||||
236 P512[4-69]
|
||||
237 P190[0-29]
|
||||
238 P150[0-3]
|
||||
239 P1376[39-47]
|
||||
240 P463[0-7]
|
||||
241 P166[0-7]
|
||||
242 P2962[18-30]
|
||||
243 P108[29-36]
|
||||
244 P39[0-3]
|
||||
245 P17[47-48]
|
||||
246 P166[21-23]
|
||||
247 P793[46-69]
|
||||
248 P69[32-41]
|
||||
249 P17[57-58]
|
||||
250 P190[42-45]
|
||||
251 P2962[39-42]
|
||||
252 P54[0-18]
|
||||
253 P26[56-61]
|
||||
254 P150[14-17]
|
||||
255 P463[16-17]
|
||||
256 P26[39-46]
|
||||
257 P579[36-43]
|
||||
258 P579[16-23]
|
||||
259 P2962[59-60]
|
||||
260 P1411[59-61]
|
||||
261 P26[20-27]
|
||||
262 P6[4-69]
|
||||
263 P1435[33-34]
|
||||
264 P166[52-53]
|
||||
265 P108[49-57]
|
||||
266 P150[10-13]
|
||||
267 P1346[47-68]
|
||||
268 P150[18-21]
|
||||
269 P1346[13-46]
|
||||
270 P69[20-23]
|
||||
271 P39[31-32]
|
||||
272 P1411[32-37]
|
||||
273 P166[62-63]
|
||||
274 P150[44-47]
|
||||
275 P2962[61-62]
|
||||
276 P150[48-51]
|
||||
277 P150[52-55]
|
||||
278 P1411[62-67]
|
||||
279 P1435[35-36]
|
||||
280 P1411[48-51]
|
||||
281 P150[22-25]
|
||||
282 P2962[63-64]
|
||||
283 P2962[65-66]
|
||||
284 P166[58-59]
|
||||
285 P190[46-49]
|
||||
286 P54[34-35]
|
||||
287 P1435[4-16]
|
||||
288 P463[18-19]
|
||||
289 P150[31-34]
|
||||
290 P150[35-38]
|
||||
291 P39[35-36]
|
||||
292 P26[62-69]
|
||||
293 P1411[56-58]
|
||||
294 P1435[37-38]
|
||||
295 P166[60-61]
|
||||
296 P39[33-34]
|
||||
297 P102[24-31]
|
||||
298 P2962[43-46]
|
||||
299 P108[37-48]
|
||||
300 P190[50-53]
|
||||
301 P39[4-6]
|
||||
302 P1435[39-40]
|
||||
303 P793[0-45]
|
||||
304 P150[64-69]
|
||||
305 P39[19-22]
|
||||
306 P27[30-38]
|
||||
307 P2962[31-38]
|
||||
308 P1411[24-31]
|
||||
309 P102[40-45]
|
||||
310 P39[37-38]
|
||||
311 P463[8-11]
|
||||
312 P1435[41-42]
|
||||
313 P27[52-59]
|
||||
314 P69[16-19]
|
||||
315 P17[16-18]
|
||||
316 P190[54-57]
|
||||
317 P1435[43-44]
|
||||
318 P166[8-15]
|
||||
319 P166[45-47]
|
||||
320 P2962[47-50]
|
||||
321 P39[39-40]
|
||||
322 P1411[52-55]
|
||||
323 P108[58-69]
|
||||
324 P463[20-21]
|
||||
325 P39[41-42]
|
||||
326 P150[26-30]
|
||||
327 P150[39-43]
|
||||
328 P1435[45-46]
|
||||
329 P26[28-38]
|
||||
330 P54[27-30]
|
||||
331 P190[58-61]
|
||||
332 P17[59-61]
|
||||
333 P54[36-37]
|
||||
334 P166[16-20]
|
||||
335 P166[37-40]
|
||||
336 P1435[47-48]
|
||||
337 P17[0-3]
|
||||
338 P26[47-55]
|
||||
339 P1435[49-50]
|
||||
340 P1435[25-28]
|
||||
341 P150[4-9]
|
||||
342 P102[63-69]
|
||||
343 P26[0-19]
|
||||
344 P1435[17-24]
|
||||
345 P39[23-26]
|
||||
346 P1435[51-52]
|
||||
347 P39[7-11]
|
||||
348 P69[12-15]
|
||||
349 P69[24-31]
|
||||
350 P102[0-23]
|
||||
351 P39[43-44]
|
||||
352 P579[24-35]
|
||||
353 P190[62-65]
|
||||
354 P1435[53-54]
|
||||
355 P1376[0-18]
|
||||
356 P27[0-14]
|
||||
357 P463[12-15]
|
||||
358 P166[33-36]
|
||||
359 P102[32-39]
|
||||
360 P17[4-7]
|
||||
361 P190[30-41]
|
||||
362 P166[24-28]
|
||||
363 P190[66-69]
|
||||
364 P69[42-69]
|
||||
365 P1435[55-56]
|
||||
366 P54[31-33]
|
||||
367 P39[45-46]
|
||||
368 P17[12-15]
|
||||
369 P1435[57-58]
|
||||
370 P54[19-26]
|
||||
371 P2962[51-54]
|
||||
372 P2962[67-69]
|
||||
373 P1435[59-60]
|
||||
374 P579[44-56]
|
||||
375 P1435[61-62]
|
||||
376 P166[41-44]
|
||||
377 P17[19-22]
|
||||
378 P1376[19-38]
|
||||
379 P17[23-26]
|
||||
380 P1376[48-69]
|
||||
381 P463[22-23]
|
||||
382 P17[27-30]
|
||||
383 P1435[63-64]
|
||||
384 P69[0-3]
|
||||
385 P1435[66-67]
|
||||
386 P17[35-38]
|
||||
387 P69[8-11]
|
||||
388 P1435[68-69]
|
||||
389 P17[31-34]
|
||||
390 P102[46-53]
|
||||
391 P27[60-69]
|
||||
392 P579[57-69]
|
||||
393 P69[4-7]
|
||||
394 P1411[7-14]
|
||||
395 P551[0-35]
|
||||
396 P108[0-28]
|
||||
397 P17[8-11]
|
||||
398 P1411[38-47]
|
||||
399 P17[43-46]
|
||||
400 P17[49-52]
|
||||
401 P166[64-69]
|
||||
402 P1435[29-32]
|
||||
403 P54[38-39]
|
||||
404 P39[27-30]
|
||||
405 P2962[55-58]
|
||||
406 P463[24-25]
|
||||
407 P17[39-42]
|
||||
408 P17[53-56]
|
||||
409 P17[66-69]
|
||||
410 P17[62-65]
|
||||
411 P1411[15-23]
|
||||
412 P166[48-51]
|
||||
413 P27[15-29]
|
||||
414 P150[56-63]
|
||||
415 P27[39-51]
|
||||
416 P39[47-48]
|
||||
417 P166[29-32]
|
||||
418 P39[12-18]
|
||||
419 P166[54-57]
|
||||
420 P551[36-69]
|
||||
421 P579[0-15]
|
||||
422 P102[54-62]
|
19271
data/wikidata12k_old/test.txt
Normal file
19271
data/wikidata12k_old/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
71
data/wikidata12k_old/time_map.dict
Normal file
71
data/wikidata12k_old/time_map.dict
Normal file
@ -0,0 +1,71 @@
|
||||
0 19 19
|
||||
1 20 1643
|
||||
2 1644 1790
|
||||
3 1791 1816
|
||||
4 1817 1855
|
||||
5 1856 1871
|
||||
6 1872 1893
|
||||
7 1894 1905
|
||||
8 1906 1913
|
||||
9 1914 1918
|
||||
10 1919 1920
|
||||
11 1921 1924
|
||||
12 1925 1929
|
||||
13 1930 1933
|
||||
14 1934 1937
|
||||
15 1938 1941
|
||||
16 1942 1945
|
||||
17 1946 1948
|
||||
18 1949 1950
|
||||
19 1951 1953
|
||||
20 1954 1956
|
||||
21 1957 1959
|
||||
22 1960 1961
|
||||
23 1962 1963
|
||||
24 1964 1965
|
||||
25 1966 1967
|
||||
26 1968 1968
|
||||
27 1969 1970
|
||||
28 1971 1972
|
||||
29 1973 1974
|
||||
30 1975 1976
|
||||
31 1977 1978
|
||||
32 1979 1980
|
||||
33 1981 1982
|
||||
34 1983 1983
|
||||
35 1984 1984
|
||||
36 1985 1985
|
||||
37 1986 1986
|
||||
38 1987 1987
|
||||
39 1988 1988
|
||||
40 1989 1989
|
||||
41 1990 1990
|
||||
42 1991 1991
|
||||
43 1992 1992
|
||||
44 1993 1993
|
||||
45 1994 1994
|
||||
46 1995 1995
|
||||
47 1996 1996
|
||||
48 1997 1997
|
||||
49 1998 1998
|
||||
50 1999 1999
|
||||
51 2000 2000
|
||||
52 2001 2001
|
||||
53 2002 2002
|
||||
54 2003 2003
|
||||
55 2004 2004
|
||||
56 2005 2005
|
||||
57 2006 2006
|
||||
58 2007 2007
|
||||
59 2008 2008
|
||||
60 2009 2009
|
||||
61 2010 2010
|
||||
62 2011 2011
|
||||
63 2012 2012
|
||||
64 2013 2013
|
||||
65 2014 2014
|
||||
66 2015 2015
|
||||
67 2016 2016
|
||||
68 2017 2017
|
||||
69 2018 2020
|
||||
70 2021 2021
|
252339
data/wikidata12k_old/train.txt
Normal file
252339
data/wikidata12k_old/train.txt
Normal file
File diff suppressed because it is too large
Load Diff
20208
data/wikidata12k_old/valid.txt
Normal file
20208
data/wikidata12k_old/valid.txt
Normal file
File diff suppressed because it is too large
Load Diff
15
data/yago/about.txt
Normal file
15
data/yago/about.txt
Normal file
@ -0,0 +1,15 @@
|
||||
# triples: 78032
|
||||
# entities: 10526
|
||||
# relations: 177
|
||||
# timesteps: 46
|
||||
# test triples: 6909
|
||||
# valid triples: 7198
|
||||
# train triples: 63925
|
||||
Measure method: N/A
|
||||
Target Size : 0
|
||||
Grow Factor: 0
|
||||
Shrink Factor: 0
|
||||
Epsilon Factor: 5.0
|
||||
Search method: N/A
|
||||
filter_dupes: inter
|
||||
nonames: False
|
10526
data/yago/entities.dict
Normal file
10526
data/yago/entities.dict
Normal file
File diff suppressed because it is too large
Load Diff
177
data/yago/relations.dict
Normal file
177
data/yago/relations.dict
Normal file
@ -0,0 +1,177 @@
|
||||
0 <wasBornIn>[0-2]
|
||||
1 <wasBornIn>[2-5]
|
||||
2 <wasBornIn>[5-7]
|
||||
3 <wasBornIn>[7-10]
|
||||
4 <wasBornIn>[10-12]
|
||||
5 <wasBornIn>[12-15]
|
||||
6 <wasBornIn>[15-17]
|
||||
7 <wasBornIn>[17-20]
|
||||
8 <wasBornIn>[20-22]
|
||||
9 <wasBornIn>[22-25]
|
||||
10 <wasBornIn>[25-27]
|
||||
11 <wasBornIn>[27-30]
|
||||
12 <wasBornIn>[30-32]
|
||||
13 <wasBornIn>[32-35]
|
||||
14 <wasBornIn>[35-45]
|
||||
15 <wasBornIn>[52-52]
|
||||
16 <diedIn>[0-3]
|
||||
17 <diedIn>[3-5]
|
||||
18 <diedIn>[5-7]
|
||||
19 <diedIn>[7-10]
|
||||
20 <diedIn>[10-12]
|
||||
21 <diedIn>[12-14]
|
||||
22 <diedIn>[14-17]
|
||||
23 <diedIn>[17-19]
|
||||
24 <diedIn>[19-21]
|
||||
25 <diedIn>[21-23]
|
||||
26 <diedIn>[23-25]
|
||||
27 <diedIn>[25-27]
|
||||
28 <diedIn>[27-29]
|
||||
29 <diedIn>[29-32]
|
||||
30 <diedIn>[32-34]
|
||||
31 <diedIn>[34-36]
|
||||
32 <diedIn>[36-38]
|
||||
33 <diedIn>[38-40]
|
||||
34 <diedIn>[40-42]
|
||||
35 <diedIn>[42-44]
|
||||
36 <diedIn>[44-47]
|
||||
37 <diedIn>[47-49]
|
||||
38 <diedIn>[49-51]
|
||||
39 <diedIn>[51-53]
|
||||
40 <diedIn>[53-55]
|
||||
41 <diedIn>[55-57]
|
||||
42 <diedIn>[59-59]
|
||||
43 <worksAt>[0-3]
|
||||
44 <worksAt>[3-5]
|
||||
45 <worksAt>[5-7]
|
||||
46 <worksAt>[7-10]
|
||||
47 <worksAt>[10-12]
|
||||
48 <worksAt>[12-14]
|
||||
49 <worksAt>[14-17]
|
||||
50 <worksAt>[17-19]
|
||||
51 <worksAt>[19-21]
|
||||
52 <worksAt>[21-23]
|
||||
53 <worksAt>[23-25]
|
||||
54 <worksAt>[25-27]
|
||||
55 <worksAt>[27-29]
|
||||
56 <worksAt>[29-32]
|
||||
57 <worksAt>[32-34]
|
||||
58 <worksAt>[34-36]
|
||||
59 <worksAt>[36-40]
|
||||
60 <worksAt>[40-42]
|
||||
61 <worksAt>[42-47]
|
||||
62 <worksAt>[47-53]
|
||||
63 <worksAt>[59-59]
|
||||
64 <playsFor>[0-3]
|
||||
65 <playsFor>[3-5]
|
||||
66 <playsFor>[5-23]
|
||||
67 <playsFor>[23-25]
|
||||
68 <playsFor>[25-27]
|
||||
69 <playsFor>[27-29]
|
||||
70 <playsFor>[29-32]
|
||||
71 <playsFor>[32-34]
|
||||
72 <playsFor>[34-36]
|
||||
73 <playsFor>[36-38]
|
||||
74 <playsFor>[38-40]
|
||||
75 <playsFor>[40-42]
|
||||
76 <playsFor>[42-44]
|
||||
77 <playsFor>[44-47]
|
||||
78 <playsFor>[47-51]
|
||||
79 <playsFor>[59-59]
|
||||
80 <hasWonPrize>[1-4]
|
||||
81 <hasWonPrize>[4-6]
|
||||
82 <hasWonPrize>[6-8]
|
||||
83 <hasWonPrize>[8-11]
|
||||
84 <hasWonPrize>[11-15]
|
||||
85 <hasWonPrize>[15-18]
|
||||
86 <hasWonPrize>[18-22]
|
||||
87 <hasWonPrize>[22-26]
|
||||
88 <hasWonPrize>[26-30]
|
||||
89 <hasWonPrize>[30-33]
|
||||
90 <hasWonPrize>[33-37]
|
||||
91 <hasWonPrize>[37-47]
|
||||
92 <hasWonPrize>[47-53]
|
||||
93 <hasWonPrize>[59-59]
|
||||
94 <isMarriedTo>[0-3]
|
||||
95 <isMarriedTo>[3-5]
|
||||
96 <isMarriedTo>[5-7]
|
||||
97 <isMarriedTo>[7-10]
|
||||
98 <isMarriedTo>[10-12]
|
||||
99 <isMarriedTo>[12-14]
|
||||
100 <isMarriedTo>[14-17]
|
||||
101 <isMarriedTo>[17-19]
|
||||
102 <isMarriedTo>[19-21]
|
||||
103 <isMarriedTo>[21-23]
|
||||
104 <isMarriedTo>[23-25]
|
||||
105 <isMarriedTo>[25-27]
|
||||
106 <isMarriedTo>[27-29]
|
||||
107 <isMarriedTo>[29-32]
|
||||
108 <isMarriedTo>[32-34]
|
||||
109 <isMarriedTo>[34-38]
|
||||
110 <isMarriedTo>[38-42]
|
||||
111 <isMarriedTo>[42-47]
|
||||
112 <isMarriedTo>[47-51]
|
||||
113 <isMarriedTo>[51-55]
|
||||
114 <isMarriedTo>[59-59]
|
||||
115 <owns>[0-10]
|
||||
116 <owns>[10-17]
|
||||
117 <owns>[17-19]
|
||||
118 <owns>[19-23]
|
||||
119 <owns>[23-36]
|
||||
120 <owns>[36-38]
|
||||
121 <owns>[59-59]
|
||||
122 <graduatedFrom>[0-3]
|
||||
123 <graduatedFrom>[3-5]
|
||||
124 <graduatedFrom>[5-7]
|
||||
125 <graduatedFrom>[7-10]
|
||||
126 <graduatedFrom>[10-14]
|
||||
127 <graduatedFrom>[14-17]
|
||||
128 <graduatedFrom>[17-19]
|
||||
129 <graduatedFrom>[19-21]
|
||||
130 <graduatedFrom>[21-23]
|
||||
131 <graduatedFrom>[23-27]
|
||||
132 <graduatedFrom>[27-32]
|
||||
133 <graduatedFrom>[32-34]
|
||||
134 <graduatedFrom>[34-38]
|
||||
135 <graduatedFrom>[38-42]
|
||||
136 <graduatedFrom>[59-59]
|
||||
137 <isAffiliatedTo>[1-4]
|
||||
138 <isAffiliatedTo>[4-6]
|
||||
139 <isAffiliatedTo>[6-8]
|
||||
140 <isAffiliatedTo>[8-11]
|
||||
141 <isAffiliatedTo>[11-13]
|
||||
142 <isAffiliatedTo>[13-15]
|
||||
143 <isAffiliatedTo>[15-18]
|
||||
144 <isAffiliatedTo>[18-20]
|
||||
145 <isAffiliatedTo>[20-22]
|
||||
146 <isAffiliatedTo>[22-24]
|
||||
147 <isAffiliatedTo>[24-26]
|
||||
148 <isAffiliatedTo>[26-28]
|
||||
149 <isAffiliatedTo>[28-30]
|
||||
150 <isAffiliatedTo>[30-33]
|
||||
151 <isAffiliatedTo>[33-35]
|
||||
152 <isAffiliatedTo>[35-37]
|
||||
153 <isAffiliatedTo>[37-40]
|
||||
154 <isAffiliatedTo>[40-42]
|
||||
155 <isAffiliatedTo>[42-44]
|
||||
156 <isAffiliatedTo>[44-47]
|
||||
157 <isAffiliatedTo>[47-49]
|
||||
158 <isAffiliatedTo>[49-51]
|
||||
159 <isAffiliatedTo>[51-53]
|
||||
160 <isAffiliatedTo>[53-55]
|
||||
161 <isAffiliatedTo>[55-57]
|
||||
162 <isAffiliatedTo>[59-59]
|
||||
163 <created>[0-3]
|
||||
164 <created>[3-5]
|
||||
165 <created>[5-10]
|
||||
166 <created>[10-12]
|
||||
167 <created>[12-17]
|
||||
168 <created>[17-19]
|
||||
169 <created>[19-25]
|
||||
170 <created>[25-29]
|
||||
171 <created>[29-32]
|
||||
172 <created>[32-36]
|
||||
173 <created>[36-42]
|
||||
174 <created>[42-47]
|
||||
175 <created>[47-53]
|
||||
176 <created>[59-59]
|
6909
data/yago/test.txt
Normal file
6909
data/yago/test.txt
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user