{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "path1 = './entities.txt'\n", "path2 = './relations.txt'\n", "path3 = './train.tsv'\n", "path4 = './dev.tsv'\n", "path5 = './test.tsv'\n", "path6 = './get_neighbor/entity2id.txt'\n", "path7 = './get_neighbor/relation2id.txt'\n", "path8 = './get_neighbor/train2id.txt'\n", "path9 = './get_neighbor/valid2id.txt'\n", "path10 = './get_neighbor/test2id.txt'" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "with open(path1, 'r') as f:\n", " a = f.readlines()\n", "cnt = 0\n", "with open(path6, 'w') as f:\n", " for line in a:\n", " en = line.strip()\n", " f.write(en + '\\t' + str(cnt) + '\\n')\n", " cnt += 1\n", " " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "with open(path2, 'r') as f:\n", " a = f.readlines()\n", "cnt = 0\n", "with open(path7, 'w') as f:\n", " for line in a:\n", " re = line.strip()\n", " f.write(re + '\\t' + str(cnt) + '\\n')\n", " cnt += 1\n", " " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "with open(path6, 'r') as f:\n", " a = f.readlines()\n", "en2id = {}\n", "for line in a:\n", " b = line.strip().split('\\t')\n", " en, num = b[0], b[1]\n", " en2id[en] = num" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "with open(path7, 'r') as f:\n", " a = f.readlines()\n", "re2id = {}\n", "for line in a:\n", " b = line.strip().split('\\t')\n", " re, num = b[0], b[1]\n", " re2id[re] = num" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "with open(path3, 'r') as f:\n", " a = f.readlines()\n", "with open(path8, 'w') as f:\n", " for line in a:\n", " b = line.strip().split('\\t')\n", " h, r, t = b[0], b[1], b[2]\n", " f.write(en2id[h] + ' ' + re2id[r] + ' ' + en2id[t] + '\\n')\n", " " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "with open(path4, 'r') as f:\n", " a = f.readlines()\n", "with open(path9, 'w') as f:\n", " for line in a:\n", " b = line.strip().split('\\t')\n", " h, r, t = b[0], b[1], b[2]\n", " f.write(en2id[h] + ' ' + re2id[r] + ' ' + en2id[t] + '\\n')\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with open(path5, 'r') as f:\n", " a = f.readlines()\n", "with open(path10, 'w') as f:\n", " for line in a:\n", " b = line.strip().split('\\t')\n", " h, r, t = b[0], b[1], b[2]\n", " f.write(en2id[h] + ' ' + re2id[r] + ' ' + en2id[t] + '\\n')\n", " " ] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }