diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1dbc687 --- /dev/null +++ b/.gitignore @@ -0,0 +1,62 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +#Ipython Notebook +.ipynb_checkpoints diff --git a/README.md b/README.md index 7246a09..c9b207c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -#Global WordNet Grid LMF parser +# Global WordNet Grid LMF parser This repo provides a python module to work with Open Dutch WordNet. Please first check the [Issues](https://github.com/MartenPostma/OpenDutchWordnet/issues) to see if your question has already @@ -20,11 +20,11 @@ If you make use of the resource and/or this repository, please cite the followin } ## Demo -A demo for word similarity using Open Dutch WordNet can be found [here](http://130.37.53.15:5000/). +A demo for word similarity using Open Dutch WordNet can be found [here](http://130.37.53.15:5000/). In the background, this uses the [WordNetTools](https://github.com/cltl/WordnetTools/). We encourage to use the module locally when you need to run for many word pairs. -##USAGE AND INSTALL +## USAGE AND INSTALL git clone this repository. The python module 'lxml' is needed. Hopefully, 'pip install lxml' @@ -80,7 +80,7 @@ python 'eng-30-00322847-v' ``` -##Contact +## Contact * Marten Postma * m.c.postma@vu.nl * http://martenpostma.com/ diff --git a/__init__.py b/__init__.py deleted file mode 100644 index 131daf3..0000000 --- a/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -import os -import sys -import subprocess - -cwd = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(cwd) - - -#source virtual env or activate it -#source_command = "source {cwd}/python_env_3.4/bin/activate".format(**locals()) - -#try: -# cmd_output = subprocess.check_output(source_command,shell=True) -#except subprocess.CalledProcessError: -# print ('''please first run 'bash install.sh' from the command line \ -#from inside the module and try again''') - -from wn_grid_parser import Wn_grid_parser - -#documentation attributes -Wn_grid_parser.odwn = os.path.join(cwd, - 'resources', - 'odwn', - 'odwn_orbn_gwg-LMF_1.3.xml.gz') -Wn_grid_parser.orbn = os.path.join(cwd, - 'resources', - 'odwn', - 'orbn_1.0.xml') -Wn_grid_parser.rbn = os.path.join(cwd, - 'resources', - 'odwn', - 'cdb_lu.xml') -Wn_grid_parser.dtd = os.path.join(cwd, - 'resources', - 'odwn', - 'odwn-orbn-lmf.dtd') -Wn_grid_parser.README = open(os.path.join(cwd,"README.md")).read() -Wn_grid_parser.LICENSE = open(os.path.join(cwd,"LICENSE.md")).read() -Wn_grid_parser.__author__ = "Marten Postma" -Wn_grid_parser.__license__ = "CC-BY-SA 4.0" -Wn_grid_parser.__version__ = "1.3" -Wn_grid_parser.__maintainer__ = "Marten Postma" -Wn_grid_parser.__email__ = "martenp@gmail.com" -Wn_grid_parser.__status__ = "development" diff --git a/annotate.sh b/annotate.sh deleted file mode 100644 index 6447281..0000000 --- a/annotate.sh +++ /dev/null @@ -1 +0,0 @@ -python user_input_test.py diff --git a/base_concepts.py b/base_concepts.py deleted file mode 100644 index f7b7d18..0000000 --- a/base_concepts.py +++ /dev/null @@ -1,77 +0,0 @@ -import os -import subprocess -import pickle -from nltk.corpus import wordnet as wn - -here=os.path.dirname(os.path.realpath(__file__)) -resources=os.path.join(here,'resources') - -#downloaded from: http://globalwordnet.org/wp-content/uploads/2013/07/5000_bc.zip -path_base_concepts=os.path.join(resources,'5000_bc.xml') - -#clone repo if needed -wordnet_mapper_git='https://github.com/MartenPostma/WordNetMapper.git' -path_wordnet_mapper=os.path.join('WordNetMapper') - -if not os.path.isdir(path_wordnet_mapper): - clone='git clone '+wordnet_mapper_git - print(subprocess.check_output(clone,shell=True)) - -from WordNetMapper import WordNetMapper -my_mapper = WordNetMapper() - -#obtain base in wordnet 3.0 -base=False -if base: - base_concepts_30 = set() - with open(path_base_concepts) as infile: - for line in infile: - offset_20 = line[18:26] - try: - offset_30,pos = my_mapper.map_offset_to_offset(offset_20, "20", "30") - ili_30 = 'eng-30-{offset_30}-{pos}'.format(**locals()) - base_concepts_30.add(ili_30) - except ValueError: - pass - - with open(os.path.join(resources,'base_concepts_30.bin'),'wb') as outfile: - pickle.dump(base_concepts_30,outfile) - - - -#wordnet synonym dict -synonyms=False -if synonyms: - synonym_dict = {} - for synset in wn.all_synsets(): - offset = str(synset.offset()) - zeros = (8-len(offset))* '0' - - pos = synset.pos() - if pos in ['n','v']: - ili = 'eng-30-{zeros}{offset}-{pos}'.format(**locals()) - lemmas = synset.lemma_names() - synonym_dict[ili] = lemmas - - with open( os.path.join(resources,'synonym_dict.bin'),'wb') as outfile: - pickle.dump(synonym_dict,outfile) - -#create empty base level concepts dict -empty_synsets = set( pickle.load( open( os.path.join(resources,'empty_pwn_synsets.bin'),'rb')) ) -base_concepts_30 = pickle.load( open( os.path.join(resources,'base_concepts_30.bin'),'rb')) -leave_pwn_synsets = pickle.load( open( os.path.join(resources,'leave_pwn_synsets.bin'),'rb')) - -overlap = list ( base_concepts_30 & set(empty_synsets) ) -for sy_id in leave_pwn_synsets: - if sy_id in overlap: - overlap.remove(sy_id) - -half = int( len(overlap) / 2) -part1 = overlap[:half] -part2 = overlap[half:] - -for basename,item in [('empty_base_synsets1.bin',part1), - ('empty_base_synsets2.bin',part2)]: - with open( os.path.join(resources,basename),'wb') as outfile: - pickle.dump(set(item),outfile) - diff --git a/clean.py b/clean.py deleted file mode 100644 index 73c5b75..0000000 --- a/clean.py +++ /dev/null @@ -1,67 +0,0 @@ - - - -class Clean(): - ''' - method to clean resource - ''' - def __init__(self): - pass - - def clean_impossible_relations(self): - ''' - all relations in - self.stats['impossible_rels'] are removed - ''' - self.get_stats() - for rel_el in self.stats['impossible_rels']: - rel_el.remove_me() - print - print("number of impossible relations removed:") - print(len(self.stats['impossible_rels'])) - - def clean_bidirectional_relations(self): - ''' - all proposed relations in self.stats['bidirectional_relations'] - are added - ''' - self.get_stats() - for source,target,reltype in self.stats['bidirectional_relations']: - sy_obj = self.synsets_find_synset(source) - if sy_obj is not None: - print('adding %s %s %s' % (source,reltype,target)) - sy_obj.add_relation(reltype,target) - - print - print("number of bidirectional links fixed") - print(len(self.stats['bidirectional_relations'])) - - - def clean_provenance_to_all_les(self): - ''' - some LexicalEntry elements do not have a provenance tag. - this method adds the "cdb2.2_Auto" tag as provenance - ''' - default = "cdb2.2_Auto" - added = 0 - for le_obj in self.les_get_generator(): - - provenance_tag = le_obj.get_provenance() - - if provenance_tag is None: - added += 1 - le_obj.sense_el.attrib["provenance"] = default - - print("number of Lexical Entries that receiced a default tag:") - print(added) - - def clean_remove_synsets_without_relations(self,list_of_synsets): - ''' - ''' - pass - - def clean_synsets_without_synonyms(self): - ''' - ''' - pass - \ No newline at end of file diff --git a/configuration.py b/configuration.py deleted file mode 100644 index 61d8b97..0000000 --- a/configuration.py +++ /dev/null @@ -1,5 +0,0 @@ - - - -xml_paths = {"path_to_synset_els" : "Lexicon/Synset", - "path_to_le_els" : "Lexicon/LexicalEntry"} \ No newline at end of file diff --git a/create_version_1_1.sh b/create_version_1_1.sh deleted file mode 100644 index 09618d0..0000000 --- a/create_version_1_1.sh +++ /dev/null @@ -1,3 +0,0 @@ -cp version1_1.py ../ -cd .. -python version1_1.py \ No newline at end of file diff --git a/create_version_1_2.sh b/create_version_1_2.sh deleted file mode 100644 index 5998e70..0000000 --- a/create_version_1_2.sh +++ /dev/null @@ -1,4 +0,0 @@ -cp version1_2.py ../ -cd .. -python version1_2.py - diff --git a/create_virtual_env.sh b/create_virtual_env.sh deleted file mode 100644 index 979c267..0000000 --- a/create_virtual_env.sh +++ /dev/null @@ -1,73 +0,0 @@ -#check if enough arguments are passed, else print usage information -if [ $# -eq 0 ]; -then - echo - echo "This script is meant to help check if the correct versions of python and virtualenv are installed." - echo "It will perform checks for this and will try to install external python modules with pip." - echo "If there is any error in one of these steps, the script will exit." - echo - echo "Usage : $0 python_version wanted_virtual_env_version vir_env_dir ext_modules" - echo - echo "python_version : python version (major.minor for example 3.4)" - echo "vir_env_dir : full path (not just name of folder) to virtual environment directory (will be created)" - echo "ext_modules : path to file in which each line contains a module_name (pip install module_name is run)" - exit -1 -fi - -#rename user input to logical variable names -cwd=${PWD#*} -log=$cwd/log -rm -rf $log && mkdir $log - -python_version=$1 -vir_env_dir=$2 -ext_modules=$3 - -function command_check () { - -RETVAL=$? -[ $RETVAL -eq 0 ] && echo $succes -[ $RETVAL -ne 0 ] && echo $failure && echo 'exiting...' && exit -1 - -} -#check if python version is installed -echo -echo "Checking python version" -export succes="Succes: python$python_version is installed" -export failure="Fail: please install python version $python_version" - -python$python_version -c "exit()" -command_check - -#create virtualenv and echo source command to stdout -echo -echo "Creating virtual environment" -virtualenv --python=python$python_version --system-site-packages $vir_env_dir -echo -echo "to activate: source $vir_env_dir/bin/activate" -echo -echo "activating virtualenv" -source $vir_env_dir/bin/activate - -#install external python modules -echo -echo "Installing external python modules" - -while read p -do - export succes="Succes: succesfully installed module $p" - export failure="Failure: error in installing module $p, please inspect $log/$p.log or $log/$p.log for the error log" - pip install $p > $log/$p.log 2> $log/$p.err - command_check - -done < $ext_modules -echo -echo "#############################################################" -echo "it seems that the virtual environment was succesfully created" -echo -echo "virtual environment directory can be found here:" -echo "$vir_env_dir" -echo -echo "to activate run:" -echo "source $vir_env_dir/bin/activate" -echo "to not have to do this everything, add the above command to files like ~/.bash_profile (files that are run on login)" diff --git a/citation.bib b/documentation/citation.bib similarity index 100% rename from citation.bib rename to documentation/citation.bib diff --git a/gwc2016_odwn13.pdf b/documentation/gwc2016_odwn13.pdf similarity index 100% rename from gwc2016_odwn13.pdf rename to documentation/gwc2016_odwn13.pdf diff --git a/html/api-objects.txt b/documentation/html/api-objects.txt similarity index 100% rename from html/api-objects.txt rename to documentation/html/api-objects.txt diff --git a/html/class-tree.html b/documentation/html/class-tree.html similarity index 100% rename from html/class-tree.html rename to documentation/html/class-tree.html diff --git a/html/crarr.png b/documentation/html/crarr.png similarity index 100% rename from html/crarr.png rename to documentation/html/crarr.png diff --git a/html/epydoc.css b/documentation/html/epydoc.css similarity index 100% rename from html/epydoc.css rename to documentation/html/epydoc.css diff --git a/html/epydoc.js b/documentation/html/epydoc.js similarity index 100% rename from html/epydoc.js rename to documentation/html/epydoc.js diff --git a/html/frames.html b/documentation/html/frames.html similarity index 100% rename from html/frames.html rename to documentation/html/frames.html diff --git a/html/help.html b/documentation/html/help.html similarity index 100% rename from html/help.html rename to documentation/html/help.html diff --git a/html/identifier-index.html b/documentation/html/identifier-index.html similarity index 100% rename from html/identifier-index.html rename to documentation/html/identifier-index.html diff --git a/html/index.html b/documentation/html/index.html similarity index 100% rename from html/index.html rename to documentation/html/index.html diff --git a/html/module-tree.html b/documentation/html/module-tree.html similarity index 100% rename from html/module-tree.html rename to documentation/html/module-tree.html diff --git a/html/odwn.clean-module.html b/documentation/html/odwn.clean-module.html similarity index 100% rename from html/odwn.clean-module.html rename to documentation/html/odwn.clean-module.html diff --git a/html/odwn.clean-pysrc.html b/documentation/html/odwn.clean-pysrc.html similarity index 97% rename from html/odwn.clean-pysrc.html rename to documentation/html/odwn.clean-pysrc.html index 92fcc80..893e236 100644 --- a/html/odwn.clean-pysrc.html +++ b/documentation/html/odwn.clean-pysrc.html @@ -57,17 +57,17 @@