From 83114b3d86a7afbb8a97af63b0bf3c421bfeab92 Mon Sep 17 00:00:00 2001
From: "david.vilares" <david.vilares@udc.es>
Date: Wed, 24 May 2017 16:05:01 +0200
Subject: [PATCH 1/4] bist-covington: a non-projective transition-based
 BIST-parser

---
 bcovington/covington.py~                |  875 +++++++++++
 bcovington/parser.py~                   |    0
 bcovington/src/covington.py             |  875 +++++++++++
 bcovington/src/covington.pyc            |  Bin 0 -> 24143 bytes
 bcovington/src/parser.py                |  175 +++
 bcovington/src/parser.pyc               |  Bin 0 -> 6247 bytes
 bcovington/src/utils.py                 |  265 ++++
 bcovington/src/utils.pyc                |  Bin 0 -> 8418 bytes
 bcovington/src/utils/conll17_ud_eval.py |  556 +++++++
 bcovington/src/utils/eval.pl            | 1827 +++++++++++++++++++++++
 bcovington/src/utils/weights.clas       |   11 +
 bcovington/utils.py                     |  269 ++++
 12 files changed, 4853 insertions(+)
 create mode 100644 bcovington/covington.py~
 create mode 100644 bcovington/parser.py~
 create mode 100644 bcovington/src/covington.py
 create mode 100644 bcovington/src/covington.pyc
 create mode 100644 bcovington/src/parser.py
 create mode 100644 bcovington/src/parser.pyc
 create mode 100644 bcovington/src/utils.py
 create mode 100644 bcovington/src/utils.pyc
 create mode 100644 bcovington/src/utils/conll17_ud_eval.py
 create mode 100644 bcovington/src/utils/eval.pl
 create mode 100644 bcovington/src/utils/weights.clas
 create mode 100644 bcovington/utils.py

diff --git a/bcovington/covington.py~ b/bcovington/covington.py~
new file mode 100644
index 0000000..ec982a9
--- /dev/null
+++ b/bcovington/covington.py~
@@ -0,0 +1,875 @@
+from dynet import *
+from utils_bcovington import read_conll, write_conll, CovingtonConfiguration
+from operator import itemgetter
+from itertools import chain
+from tarjan import tarjan
+import time, random
+import numpy as np
+import os
+import warnings
+
+
+"""
+This is a module extended from original the transition-based BIST-Parser barchybrid:
+
+https://github.com/elikip/bist-parser/blob/master/barchybrid/
+Kiperwasser, E., & Goldberg, Y. (2016). Simple and accurate dependency parsing using bidirectional LSTM feature representations. arXiv preprint arXiv:1603.04351.
+
+
+that has been adapted to include to support non-projective transition-based dependency parsing
+using an implementation (O(n^2)) of the traditional Covington's (2001) algorithm, according
+to the list-based transition-based described in Nivre (2008).
+
+Covington, M. A. (2001). A fundamental algorithm for dependency parsing. In Proceedings of the 39th annual ACM southeast conference (pp. 95-102).
+Nivre, J. (2008). Algorithms for deterministic incremental dependency parsing. Computational Linguistics, 34(4), 513-553.
+
+We also include the O(n) dynamic oracle described in Gomez-Rodriguez and Fernandez-Gonzalez (2015).
+TODO: Current implementation is O(n^2)
+
+Gomez-Rodriguez, C., & Fernandez-Gonzalez, D. (2015). An efficient dynamic oracle for unrestricted non-projective parsing. Volume 2: Short Papers, 256.
+
+"""
+
+
+
+class CovingtonBILSTM:
+    
+    #ACTIVATION FUNCTIONS
+    TANH = 'tanh'
+    SIGMOID = 'sigmoid'
+    RELU = 'relu'
+    TANH3 = 'tanh3'
+    
+    #OPTIMIZERS
+    SGD="sgd"
+    MOMENTUM="momentum"
+    ADAGRAD="adagrad"
+    ADADELTA="adadelta"
+    ADAM = "adam"
+      
+    #SPECIAL INDEXES
+    INDEX_WORD_PAD = 1
+    INDEX_WORD_INITIAL = 2
+    INDEX_POS_PAD = 1
+    INDEX_POS_INITIAL = 2
+    INIT_WORD_INDEX = 3
+    INIT_POS_INDEX = INIT_WORD_INDEX
+    
+    INDEX_FEATS_PAD = 1
+    INDEX_FEATS_INITIAL= 2
+    INIT_FEATS_INDEX = INIT_WORD_INDEX
+    
+    #TRANSITIONS
+    LEFT_ARC = 0
+    RIGHT_ARC = 1
+    SHIFT = 2
+    NO_ARC = 3
+    TRANSITIONS = [LEFT_ARC, RIGHT_ARC, SHIFT, NO_ARC]    
+
+    #OTHER HYPERPARAMETERS
+    SIZE_TRANSITIONS = len(TRANSITIONS)
+    
+    def __init__(self, words, lemmas, cpos, pos, feats, rels, w2i, l2i, options, path_oov_external_embedding=None,
+                 pretrained=False):
+        
+        self.model = Model()
+        if options.optimizer == self.ADAM:
+            self.trainer = AdamTrainer(self.model)
+        elif options.optimizer == self.SGD:
+            self.trainer = SimpleSGDTrainer(self.model)
+        elif options.optimizer == self.MOMENTUM:
+            self.trainer = MomentumSGDTrainer(self.model)
+        elif options.optimizer == self.ADAGRAD:
+            self.trainer = AdagradTrainer(self.model)
+        elif options.optimizer == self.ADADELTA:
+            self.trainer = AdadeltaTrainer(self.model)
+        else:
+            raise NotImplementedError("Selected optimizer is not available")
+                     
+        random.seed(1)
+
+        self.activations = {self.TANH: tanh, 
+                            self.SIGMOID: logistic, 
+                            self.RELU: rectify, 
+                            self.TANH3: (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))}
+        
+        self.activation = self.activations[options.activation]
+
+        self.oracle = options.oracle
+        
+        
+        self.ldims = options.lstm_dims * 2 #*2 because it is a bi-lstm
+        self.wdims = options.wembedding_dims 
+        self.pdims = options.pembedding_dims 
+        self.rdims = options.rembedding_dims 
+        self.layers = options.lstm_layers
+        self.wordsCount = words
+        
+        self.vocab = {word: ind+self.INIT_WORD_INDEX for word, ind in w2i.iteritems()} 
+        self.lemmas = {lemma: ind+self.INIT_WORD_INDEX for lemma,ind in l2i.iteritems()}
+        self.cpos = {cpos: ind+self.INIT_POS_INDEX for ind, cpos in enumerate(cpos)}
+        self.pos = {pos: ind+self.INIT_POS_INDEX for ind, pos in enumerate(pos)}
+        self.feats = {f: ind+self.INIT_FEATS_INDEX for ind, f in enumerate(feats)}
+        self.rels = {word: ind for ind, word in enumerate(rels)}
+        
+        #List of dependency types
+        self.irels = rels 
+
+        self.headFlag = options.headFlag
+        self.rlMostFlag = options.rlMostFlag
+        self.rlFlag = options.rlFlag
+        self.kb = options.window_b
+        self.kl1 = options.window_l1
+        self.kl2_r = options.window_l2r
+        self.kl2_l = options.window_l2l
+
+        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0)
+        
+        #Reading external embedding files, if they exists
+
+        #INFORMATION FOR EXTERNAL WORD EMBEDDINGS
+        self.external_embedding = None
+        self.edim = None
+        self.noextrn = None
+        self.extrnd = None
+        self.elookup = None
+        if options.external_embedding is not None and os.path.exists(options.external_embedding):
+            self.external_embedding, self.edim,self.noextrn,self.extrnd, self.elookup = self._assign_external_embeddings(options.external_embedding,
+                                                                                                                    self.INDEX_WORD_PAD, self.INDEX_WORD_INITIAL)
+        else:
+            warnings.warn("Not using any external file for FORM embeddings")
+                
+        #INFORMATION FOR THE EXTERNAL CPOSTAG EMBEDDINGS
+        self.cpos_external_embedding = None
+        self.cpos_edim = None
+        self.cpos_noextrn = None
+        self.cpos_extrnd = None
+        self.cpos_elookup = None
+        if options.cpos_external_embedding is not None and os.path.exists(options.cpos_external_embedding):
+            self.cpos_external_embedding, self.cpos_edim,self.cpos_noextrn,self.cpos_extrnd, self.cpos_elookup = self._assign_external_embeddings(options.cpos_external_embedding,
+                                                                                                                                             self.INDEX_POS_PAD, self.INDEX_POS_INITIAL)
+        else:
+            warnings.warn("Not using any external file for CPOSTAG embeddings")
+            
+        #INFORMATION FOR THE EXTERNAL POSTAG EMBEDDINGS
+        self.pos_external_embedding = None
+        self.pos_edim = None
+        self.pos_noextrn = None
+        self.pos_extrnd = None
+        self.pos_elookup= None
+        if options.pos_external_embedding is not None and os.path.exists(options.pos_external_embedding):
+            self.pos_external_embedding, self.pos_edim,self.pos_noextrn,self.pos_extrnd, self.pos_elookup = self._assign_external_embeddings(options.pos_external_embedding,
+                                                                                                                                             self.INDEX_POS_PAD, self.INDEX_POS_INITIAL)
+        else:
+            warnings.warn("Not using any external file for POSTAG embeddings")
+            
+        #INFORMATION FOR THE EXTERNAL FEATS EMBEDDINGS
+        self.feats_external_embedding = None
+        self.feats_edim = None
+        self.feats_noextrn = None
+        self.feats_extrnd = None
+        self.feats_elookup= None
+              
+        if options.feats_external_embedding is not None and os.path.exists(options.feats_external_embedding):
+            self.feats_external_embedding, self.feats_edim,self.feats_noextrn,self.feats_extrnd, self.feats_elookup = self._assign_external_embeddings(options.feats_external_embedding,                                                                                                                        self.INDEX_FEATS_PAD, self.INDEX_FEATS_INITIAL)
+        else:
+            warnings.warn("Not using any external file for FEATS embeddings")        
+        
+        
+        #INFORMATION FOR THE EXTERNAL FEATS EMBEDDINGS
+#         self.lemmas_external_embedding = None
+#         self.lemmas_edim = None
+#         self.lemmas_noextrn = None
+#         self.lemmas_extrnd = None
+#         self.lemmas_elookup= None
+              
+#         if options.lemmas_external_embedding is not None and os.path.exists(options.lemmas_external_embedding):
+#             self.lemmas_external_embedding, self.lemmas_edim,self.lemmas_noextrn,self.lemmas_extrnd, self.lemmas_elookup = self._assign_external_embeddings(options.lemmas_external_embedding,                                                                                                                        self.INDEX_FEATS_PAD, self.INDEX_FEATS_INITIAL)
+#         else:
+#             warnings.warn("Not using any external file for LEMMAS embeddings")        
+        
+        
+        
+        
+        self.oov_external_embedding = None
+        self.oov_edim = None
+        self.oov_noextrn = None
+        self.oov_extrnd = None
+        self.oov_elookup = None
+        
+        
+        if path_oov_external_embedding is not None and os.path.exists(options.feats_external_embedding):
+                        self.oov_external_embedding, self.oov_edim,self.oov_noextrn,self.oov_extrnd, self.oov_elookup = self._assign_external_embeddings(path_oov_external_embedding,
+                                                                                                                    self.INDEX_WORD_PAD, self.INDEX_WORD_INITIAL) 
+
+        if self.oov_external_embedding is not None and self.oov_edim != self.edim:
+            raise ValueError("The dimensions of the embeddings for OOV words is not equal to the dimension of the rest of external word embeddings (self.oov_edim != self.edim)")
+            
+        #Obtaining the dimension of the input
+        dims = (self.wdims + self.pdims + (self.edim if self.external_embedding is not None else 0) + 
+                                          (self.cpos_edim if self.cpos_external_embedding is not None else 0) +
+                                          (self.pos_edim if self.pos_external_embedding is not None else 0)+
+                                          (self.feats_edim if self.feats_external_embedding is not None else 0)
+#                                           +
+#                                           (self.lemmas_edim if self.lemmas_external_embedding is not None else 0)
+                                          )
+        
+        
+        #Initialization of the architecture
+        
+        self.blstmFlag = options.blstmFlag
+        self.bibiFlag = options.bibiFlag
+
+        if self.bibiFlag:
+            self.surfaceBuilders = [VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model),
+                                    VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model)]
+            self.bsurfaceBuilders = [VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model),
+                                     VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model)]
+        elif self.blstmFlag:
+            if self.layers > 0:
+                self.surfaceBuilders = [VanillaLSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model), LSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model)]
+            else:
+                self.surfaceBuilders = [SimpleRNNBuilder(1, dims, self.ldims * 0.5, self.model), LSTMBuilder(1, dims, self.ldims * 0.5, self.model)]
+
+
+        self.hidden_units = options.hidden_units
+        self.hidden2_units = options.hidden2_units
+        self.vocab['*PAD*'] = self.INDEX_WORD_PAD
+        self.cpos['*PAD*'] = self.INDEX_POS_PAD
+        self.feats['*PAD*'] = self.INDEX_FEATS_PAD
+
+        self.vocab['*INITIAL*'] = self.INDEX_WORD_INITIAL
+        self.cpos['*INITIAL*'] = self.INDEX_POS_INITIAL
+        self.feats['*INITIAL*'] = self.INDEX_FEATS_INITIAL
+
+        self.wlookup = self.model.add_lookup_parameters((len(words) + self.INIT_WORD_INDEX, self.wdims))
+        self.plookup = self.model.add_lookup_parameters((len(cpos) + self.INIT_POS_INDEX, self.pdims))
+        self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims))
+
+        
+        self.word2lstm = self.model.add_parameters((self.ldims, dims))
+        
+        self.word2lstmbias = self.model.add_parameters((self.ldims))
+        self.lstm2lstm = self.model.add_parameters((self.ldims, self.ldims * self.nnvecs + self.rdims))
+        self.lstm2lstmbias = self.model.add_parameters((self.ldims))
+
+        self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * self.nnvecs * (self.kl1 + self.kl2_l + self.kl2_r  + self.kb)))
+        self.hidBias = self.model.add_parameters((self.hidden_units))
+
+        self.hid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
+        self.hid2Bias = self.model.add_parameters((self.hidden2_units))
+
+        self.outLayer = self.model.add_parameters((self.SIZE_TRANSITIONS, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
+        self.outBias = self.model.add_parameters((self.SIZE_TRANSITIONS))
+
+        self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * self.nnvecs * (self.kl1 + self.kl2_l + self.kl2_r  + self.kb)))
+        self.rhidBias = self.model.add_parameters((self.hidden_units))
+
+        self.rhid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
+        self.rhid2Bias = self.model.add_parameters((self.hidden2_units))
+
+        self.routLayer = self.model.add_parameters((2 * (len(self.irels) + 0) + 1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
+        self.routBias = self.model.add_parameters((2 * (len(self.irels) + 0) + 1))
+
+        self.pretrained = pretrained
+
+
+    def _assign_external_embeddings(self,option_external_embedding,
+                                    index_pad,index_initial):
+        """
+        Reads an external embedding file
+        Returns:
+        external_embedding: A dictionary of key:embedding
+        edim: Dimension of the embedding
+        noextrn: ??
+        extrnd: Index for each key
+        elookup: Parameter lookup 
+        """
+            
+
+        if option_external_embedding is not None:
+ 
+            external_embedding_fp = open(option_external_embedding,'r')
+            external_embedding_fp.readline()
+                
+            external_embedding = {line.split(' ')[0] : [float(f) for f in line.strip().split(' ')[1:]] 
+                                           for line in external_embedding_fp}
+            
+            
+            external_embedding_fp.close()
+    
+            edim = len(external_embedding.values()[0])
+            noextrn = [0.0 for _ in xrange(edim)]
+            extrnd = {element: i + self.INIT_POS_INDEX 
+                                    for i, element in enumerate(external_embedding)}
+            elookup = self.model.add_lookup_parameters((len(external_embedding) + self.INIT_WORD_INDEX, edim))
+                
+            for element, i in extrnd.iteritems():
+                    elookup.init_row(i, external_embedding[element])
+            extrnd['*PAD*'] = index_pad
+            extrnd['*INITIAL*'] = index_initial
+
+            return external_embedding, edim, noextrn, extrnd, elookup
+    
+        return None,None,None,None,None
+
+
+
+    def __evaluate(self, c, train):
+        """
+        @param c: A CovingtonConfiguration instance
+        @param train: True if used in the training phase, False otherwise
+        Returns the scores for all possible transitions (training)
+        or the top ones (testing) for a given configuration c
+        """
+              
+        #Gets the embeddings for the terms to be used in the prediction
+        top_l1  = [c.sentence[c.l1-i].lstms if c.l1 - i > 0 else [self.empty] for i in xrange(self.kl1)]
+        top_l2l = [c.sentence[c.l1+1+i].lstms if c.l1+1+i < c.b  else [self.empty] for i in xrange(self.kl2_l)]
+        top_l2r = [c.sentence[c.b-i].lstms if c.b-i > c.l1 else [self.empty] for i in xrange(self.kl2_r)]
+        topBuffer = [c.sentence[c.b+i-1].lstms if c.b+i-1 <= c.sentence[-1].id else [self.empty] for i in xrange(self.kb)]
+
+        input = concatenate(list(chain(*(top_l1 + top_l2l + top_l2r + topBuffer))))
+
+        if self.hidden2_units > 0:
+            routput = (self.routLayer.expr() * self.activation(self.rhid2Bias.expr() + self.rhid2Layer.expr() * self.activation(self.rhidLayer.expr() * input + self.rhidBias.expr())) + self.routBias.expr())
+        else:
+            routput = (self.routLayer.expr() * self.activation(self.rhidLayer.expr() * input + self.rhidBias.expr()) + self.routBias.expr())
+
+        if self.hidden2_units > 0:
+            output = (self.outLayer.expr() * self.activation(self.hid2Bias.expr() + self.hid2Layer.expr() * self.activation(self.hidLayer.expr() * input + self.hidBias.expr())) + self.outBias.expr())
+        else:
+            output = (self.outLayer.expr() * self.activation(self.hidLayer.expr() * input + self.hidBias.expr()) + self.outBias.expr())
+
+        scrs, uscrs = routput.value(), output.value()
+
+        if train:
+            left_arc_info = [(rel,self.LEFT_ARC, scrs[1+j*2] + uscrs[self.LEFT_ARC], routput[1+j*2]+ output[self.LEFT_ARC]) 
+                                for j, rel in enumerate(self.irels) if c.l1 > 0 and c.l1 < c.b and c.b <= c.sentence[-1].id]
+    
+            right_arc_info = [(rel,self.RIGHT_ARC, scrs[2+j*2] + uscrs[self.RIGHT_ARC], routput[2+j*2]+ output[self.RIGHT_ARC]) 
+                                 for j, rel in enumerate(self.irels) if c.l1 >= 0 and c.l1 < c.b and c.b <= c.sentence[-1].id]
+            
+            shift_info = [ (None, self.SHIFT, scrs[0] + uscrs[self.SHIFT], routput[0] + output[self.SHIFT]) ] if c.b <= c.sentence[-1].id else []
+
+            no_arc_info = [(None, self.NO_ARC,scrs[3] + uscrs[self.NO_ARC], routput[3] + output[self.NO_ARC] )] if c.l1> 0 and  c.b <= c.sentence[-1].id else []
+            
+            ret = [left_arc_info,right_arc_info, shift_info, no_arc_info]
+                            
+        else:
+            #It is done different from the 'train' phase, due to the dynamic oracle.
+            #In the test phase we already pick the most likely transition/dependency instead of returning them all
+            #and then selecting one according to the prediction of the dynamic oracle
+            sLEFT,rLEFT = max(zip(scrs[1::2],self.irels))
+            sRIGHT,rRIGHT = max(zip(scrs[2::2],self.irels))
+            sLEFT += uscrs[self.LEFT_ARC]
+            sRIGHT += uscrs[self.RIGHT_ARC]
+            ret = [ [(rLEFT, self.LEFT_ARC, sLEFT) ] if (c.l1 > 0 and c.l1 < c.b and c.b <= c.sentence[-1].id and self._is_valid_left_arc(c)) else [],  
+                    [(rRIGHT, self.RIGHT_ARC, sRIGHT) ] if (c.l1 >= 0 and c.l1 < c.b and c.b <= c.sentence[-1].id and self._is_valid_right_arc(c)) else [],  
+                    [(None, self.SHIFT, scrs[0] + uscrs[self.SHIFT]) ] if (c.b <= c.sentence[-1].id) else [],
+                    [(None, self.NO_ARC,scrs[3] + uscrs[self.NO_ARC]) ] if (c.l1 > 0 and c.b <= c.sentence[-1].id) else []
+                     ]
+        return ret
+
+
+    def Save(self, filename):
+        self.model.save(filename)
+
+
+    def Load(self, filename):
+        self.model.load(filename)
+
+    def Init(self):
+        evec = self.elookup[1] if self.external_embedding is not None  else None
+        cpos_evec = self.cpos_elookup[1] if self.cpos_external_embedding is not None else None
+        pos_evec = self.pos_elookup[1] if self.pos_external_embedding is not None else None
+        feats_evec = self.feats_elookup[1] if self.feats_external_embedding is not None else None
+      #  lemmas_evec = self.lemmas_elookup[1] if self.lemmas_external_embedding is not None else None
+        paddingWordVec = self.wlookup[1]
+        paddingPosVec = self.plookup[1] if self.pdims > 0 else None
+      #  paddingVec = tanh(self.word2lstm.expr() * concatenate(filter(None, [paddingWordVec, paddingPosVec, evec, cpos_evec, pos_evec, feats_evec, lemmas_evec])) + self.word2lstmbias.expr())
+        paddingVec = tanh(self.word2lstm.expr() * concatenate(filter(None, [paddingWordVec, paddingPosVec, evec, cpos_evec, pos_evec, feats_evec])) + self.word2lstmbias.expr())
+        self.empty = paddingVec if self.nnvecs == 1 else concatenate([paddingVec for _ in xrange(self.nnvecs)])
+
+
+    def getWordEmbeddings(self, sentence, train):
+        """
+        Gets the embeddings (also external) for every term in a sentence
+        Returns a vector of all embeddings concatenated
+        """
+        
+        for root in sentence:
+            c = float(self.wordsCount.get(root.norm, 0))
+            dropFlag =  not train or (random.random() < (c/(0.25+c)))
+            sys.stdout.flush()
+            root.wordvec = self.wlookup[int(self.vocab.get(root.norm, 0)) if dropFlag else 0]
+            root.cposvec = self.plookup[int(self.cpos.get(root.cpos,0))] if self.pdims > 0 else None
+
+            #For word embeddings
+            if self.external_embedding is not None:
+                if root.form in self.external_embedding:
+                    root.evec = self.elookup[self.extrnd[root.form]]
+                elif root.norm in self.external_embedding:
+                    root.evec = self.elookup[self.extrnd[root.norm]]
+                else:
+                    if (self.oov_external_embedding is not None and root.form.replace(" ","_") in self.oov_external_embedding):
+                        root.evec = self.oov_elookup[self.oov_extrnd[root.form.replace(" ","_")]]
+                    else:
+                        root.evec = self.elookup[0]
+            else:
+                root.evec = None
+
+            #For cpostag embeddings
+            if self.cpos_external_embedding is not None:
+                if root.cpos in self.cpos_external_embedding:
+                    root.cposevec = self.cpos_elookup[self.cpos_extrnd[root.cpos]]
+                else:
+                    root.cposevec = self.cpos_elookup[0]
+            else:
+                root.cposevec = None
+            
+            #For postag embeddings
+            if self.pos_external_embedding is not None:
+                if root.pos in self.pos_external_embedding:
+                    root.posevec = self.pos_elookup[self.pos_extrnd[root.pos]]
+                else:
+                    root.posevec = self.pos_elookup[0]
+            else:
+                root.posevec = None
+#             
+            #For feats embeddings
+            if self.feats_external_embedding is not None:
+                if root.feats in self.feats_external_embedding:
+                    root.featsevec = self.feats_elookup[self.feats_extrnd[root.feats]]
+                else:
+                    root.featsevec = self.feats_elookup[0]
+            else:
+                root.featsevec = None
+            
+            
+            #For lemmas embeddings
+#             if self.lemmas_external_embedding is not None:
+#                 if root.lemma in self.lemmas_external_embedding:
+#                     root.lemmasevec = self.lemmas_elookup[self.lemmas_extrnd[root.lemma]]
+#                 else:
+#                     root.lemmasevec = self.lemmas_elookup[0]
+#             else:
+#                 root.lemmasevec = None            
+            
+            
+         #   root.ivec = concatenate(filter(None, [root.wordvec, root.cposvec, root.evec, root.cposevec, root.posevec, root.featsevec, root.lemmasevec]))
+            root.ivec = concatenate(filter(None, [root.wordvec, root.cposvec, root.evec, root.cposevec, root.posevec, root.featsevec]))
+            
+        if self.blstmFlag:
+            forward  = self.surfaceBuilders[0].initial_state()
+            backward = self.surfaceBuilders[1].initial_state()
+
+            for froot, rroot in zip(sentence, reversed(sentence)):
+                forward = forward.add_input( froot.ivec )
+                backward = backward.add_input( rroot.ivec )
+                froot.fvec = forward.output()
+                rroot.bvec = backward.output()
+            for root in sentence:
+                root.vec = concatenate( [root.fvec, root.bvec] )
+
+            if self.bibiFlag:
+                bforward  = self.bsurfaceBuilders[0].initial_state()
+                bbackward = self.bsurfaceBuilders[1].initial_state()
+
+                for froot, rroot in zip(sentence, reversed(sentence)):
+                    bforward = bforward.add_input( froot.vec )
+                    bbackward = bbackward.add_input( rroot.vec )
+                    froot.bfvec = bforward.output()
+                    rroot.bbvec = bbackward.output()
+                for root in sentence:
+                    root.vec = concatenate( [root.bfvec, root.bbvec] )
+
+        else:
+            for root in sentence:
+                root.ivec = (self.word2lstm.expr() * root.ivec) + self.word2lstmbias.expr()
+                root.vec = tanh( root.ivec )
+
+
+    def Predict(self, conll_path):
+        """
+        Makes non-projective depending parsing prediction given a ConLL-X file
+        """
+    
+        
+        with open(conll_path, 'r') as conllFP:
+            for iSentence, sentence in enumerate(read_conll(conllFP)):
+                self.Init()
+
+                l1 = sentence[0].id
+                b = sentence[1].id
+                arcs = set([])   
+                
+                self.getWordEmbeddings(sentence, False)
+
+                for root in sentence:
+                    root.lstms = [root.vec for _ in xrange(self.nnvecs)]
+
+                hoffset = 1 if self.headFlag else 0
+
+                c = CovingtonConfiguration(l1,b,sentence,arcs)
+                while not self._is_final_state(b,sentence):
+
+                    transition_scores = self.__evaluate(c, False)
+
+                    
+                    best = max(chain(*transition_scores), key = itemgetter(2) )
+
+                    if best[1] == self.LEFT_ARC:
+                        
+                        sentence[l1].pred_parent_id = sentence[b].id
+                        sentence[l1].pred_relation = best[0]
+                        best_op = self.LEFT_ARC
+                        if self.rlMostFlag:
+                            sentence[b].lstms[best_op+hoffset] = sentence[l1].lstms[best_op+hoffset]
+                        if self.rlFlag:
+                            sentence[b].lstms[best_op+hoffset] = sentence[l1].vec
+
+                        arcs.add((b,l1))
+                        l1 = l1 -1
+                        
+                    elif best[1] == self.RIGHT_ARC:
+                        
+                        sentence[b].pred_parent_id = sentence[l1].id
+                        sentence[b].pred_relation = best[0]
+
+                        best_op = self.RIGHT_ARC
+                        if self.rlMostFlag:
+                            sentence[l1].lstms[best_op+hoffset] = sentence[b].lstms[best_op+hoffset]
+                        if self.rlFlag:
+                            sentence[l1].lstms[best_op+hoffset] = sentence[b].vec
+                        
+                        arcs.add((l1,b))
+                        l1 = l1-1
+
+                    elif best[1] == self.SHIFT:
+                        l1 = b
+                        b = b + 1
+
+
+                    elif best[1] == self.NO_ARC:
+                        l1 = l1 - 1
+
+                    c = CovingtonConfiguration(l1,b,sentence,arcs)
+                renew_cg()
+                yield sentence
+
+
+    def Train(self, conll_path):
+        """
+        Trains a O(n^2) Covington's parser with a O(n^2) dynamic oracle
+        """
+        mloss = 0.0
+        errors = 0
+        batch = 0
+        eloss = 0.0
+        eerrors = 0
+        lerrors = 0
+        etotal = 0
+        ltotal = 0
+        ninf = -float('inf')
+
+        hoffset = 1 if self.headFlag else 0
+
+        start = time.time()
+
+        with open(conll_path, 'r') as conllFP:
+            shuffledData = list(read_conll(conllFP))
+            
+            random.shuffle(shuffledData)
+
+
+            errs = []
+            eeloss = 0.0
+
+            self.Init()
+
+            for iSentence, sentence in enumerate(shuffledData):    
+                if iSentence % 100 == 0 and iSentence != 0:
+                    print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Labeled Errors:', (float(lerrors) / etotal) , 'Time', time.time()-start                  
+                    start = time.time()
+                    eerrors = 0
+                    eloss = 0.0
+                    etotal = 0
+                    lerrors = 0
+                    ltotal = 0
+
+                self.getWordEmbeddings(sentence, True)
+                #We obtain the gold arcs to then compute the dynamic oracle for covington
+                gold_arcs = set([])
+                for word in sentence:
+                    
+                    #TODO: Weird error if not, adds and arc (0,0)
+                    if word.id != word.parent_id:
+                        gold_arcs.add((word.parent_id,word.id))
+                
+                
+                l1 = sentence[0].id
+                b = sentence[1].id
+                arcs = set([])               
+                c = CovingtonConfiguration(l1,b,sentence,arcs)
+                loss_c = self._loss(c,gold_arcs, iSentence)
+            
+                for word in sentence:
+                    word.lstms = [word.vec for _ in xrange(self.nnvecs)]
+
+                hoffset = 1 if self.headFlag else 0
+
+                while not self._is_final_state(b,sentence):
+
+                    costs = [None,None,None,None]
+                    transition_scores = self.__evaluate(c, True)
+
+                    #We determine if the transitions are valid for a given configuration c
+                    for t in self.TRANSITIONS:
+                        
+                        l1_aux = l1
+                        b_aux = b
+                        arcs_aux =  set(arcs)
+                        valid_transition = False
+                        
+                        if t == self.LEFT_ARC and self._is_valid_left_arc(c):
+                            arcs_aux.add((b_aux,l1_aux))
+                            l1_aux = l1_aux -1
+                            valid_transition = True
+
+                        if t == self.RIGHT_ARC and l1 >=0 and self._is_valid_right_arc(c):
+                            arcs_aux.add((l1_aux,b_aux))
+                            l1_aux = l1_aux-1
+                            valid_transition = True
+                                 
+                        if t == self.NO_ARC and l1 >0:
+                            l1_aux = l1_aux-1
+                            valid_transition = True   
+                               
+                        if t == self.SHIFT:
+                            l1_aux = b_aux
+                            b_aux = b_aux + 1 
+                            valid_transition = True      
+                        
+                        if valid_transition:  
+             
+                            new_c = CovingtonConfiguration(l1_aux,b_aux,sentence,arcs_aux)
+                            loss_new_c = self._loss(new_c,gold_arcs,iSentence)
+                                               
+                            cost = loss_new_c - loss_c
+                            costs[t] = float(cost)
+
+                    #Valid transitions are those with cost 0
+                    #If it is a LEFT/RIGHT arc, also the relation must match with the one in gold standard
+                    valid_transitions = [s for s in chain(*transition_scores) if costs[s[1]] == 0 and (s[1] in [self.SHIFT,self.NO_ARC] 
+                                                                                                          or ((s[1] == self.LEFT_ARC and s[0] == sentence[l1].relation) 
+                                                                                                          or (s[1] == self.RIGHT_ARC and s[0] == sentence[b].relation)))]
+
+                    best_valid = max(valid_transitions, key=itemgetter(2))
+
+                    wrong_transitions = [s for s in chain(*transition_scores) if costs[s[1]] is not None and ( (costs[s[1]] != 0) or (s[1] in [self.LEFT_ARC,self.RIGHT_ARC] 
+                                                                                                          and ((s[1] == self.LEFT_ARC and s[0] != sentence[l1].relation) 
+                                                                                                              or (s[1] == self.RIGHT_ARC and s[0] != sentence[b].relation))) ) ]
+                    
+                    #Aggressive exploration as done by Kiperwasser and Golberg (2016)
+                    if wrong_transitions != []:
+                        best_wrong = max(wrong_transitions, key=itemgetter(2))    
+
+                        best = best_valid if ( (not self.oracle) or (best_valid[2] - best_wrong[2] > 1.0) 
+                                              or (best_valid[2] > best_wrong[2] and random.random() > 0.1) ) else best_wrong
+                    else:
+                        best = best_valid 
+
+
+                    #Moving a new configuration based on the "best" choice
+                    if best[1] == self.LEFT_ARC:
+                                      
+                        sentence[l1].pred_parent_id = sentence[b].id
+                        sentence[l1].pred_relation = best[0]
+
+                        best_op = self.LEFT_ARC
+                        if self.rlMostFlag:
+                            sentence[b].lstms[best_op+hoffset] = sentence[l1].lstms[best_op+hoffset]
+                        if self.rlFlag:
+                            sentence[b].lstms[best_op+hoffset] = sentence[l1].vec
+                        
+                        child = sentence[l1]
+                        arcs.add((b,l1))
+                        l1 = l1 -1
+                        
+                    elif best[1] == self.RIGHT_ARC:
+                        
+                        
+                        sentence[b].pred_parent_id = sentence[l1].id
+                        sentence[b].pred_relation = best[0]
+
+                        best_op = self.RIGHT_ARC
+                        if self.rlMostFlag:
+                            sentence[l1].lstms[best_op+hoffset] = sentence[b].lstms[best_op+hoffset]
+                        if self.rlFlag:
+                            sentence[l1].lstms[best_op+hoffset] = sentence[b].vec
+                        
+                        arcs.add((l1,b))
+                        child = sentence[b]
+                        l1 = l1-1
+
+
+                    elif best[1] == self.SHIFT:
+                        l1 = b
+                        child = sentence[b]
+                        b = b + 1
+
+
+                    elif best[1] == self.NO_ARC:
+                        l1 = l1 - 1
+                        child = sentence[l1]
+
+
+                    if best_valid[2] < best_wrong[2] + 1.0:
+                        loss = best_wrong[3] - best_valid[3]
+                        mloss += 1.0 + best_wrong[2] - best_valid[2]
+                        eloss += 1.0 + best_wrong[2] - best_valid[2]
+                        errs.append(loss)
+
+                    
+                    if best[1] not in [self.SHIFT, self.NO_ARC] and (child.pred_parent_id != child.parent_id or child.pred_relation != child.relation):
+                        lerrors += 1
+                        if child.pred_parent_id != child.parent_id:
+                            errors += 1 
+                            eerrors += 1 
+
+                    etotal += 1
+                    c = CovingtonConfiguration(l1,b,sentence,arcs)
+                    loss_c = self._loss(c,gold_arcs, iSentence)
+                 
+
+                if len(errs) > 50: 
+                    eerrs = esum(errs)
+                    scalar_loss = eerrs.scalar_value()
+                    eerrs.backward()
+                    self.trainer.update()
+                    errs = []
+                    lerrs = []
+
+                    renew_cg()
+                    self.Init()
+
+        if len(errs) > 0:
+            eerrs = (esum(errs)) # * (1.0/(float(len(errs))))
+            eerrs.scalar_value()
+            eerrs.backward()
+            self.trainer.update()
+
+            errs = []
+            lerrs = []
+
+            renew_cg()
+
+        self.trainer.update_epoch()
+        print "Loss: ", mloss/iSentence
+
+
+    def _is_final_state(self,b,sentence):
+        return b >= len(sentence)
+
+
+    def _is_valid_left_arc(self,c):
+        
+        aux = set(c.A)
+        aux.add((c.b,c.l1))
+        l1_has_head = self._y_has_head(c.A, c.b, c.l1) 
+        return (c.l1 > 0 and not l1_has_head
+                and self._count_cycles(aux) == 0)
+
+
+    def _is_valid_right_arc(self,c):
+        
+        b_has_head = self._y_has_head(c.A, c.l1, c.b)
+        aux = set(c.A)
+        aux.add((c.l1,c.b))
+        return ((not b_has_head) and self._count_cycles(aux) == 0)
+        
+        
+    """
+    Gomez-Rodriguez & Fernandez-Gonzalez: 
+    An Efficiente Dynamic Oracle for Unrestricted Non-Projective Parsing  (ACL,2015)
+    Algorithm 1
+    """
+    def _loss(self, c, gold_arcs, iSentence):
+        
+        U = set([]) #set of unreachable nodes
+        non_built_arcs = gold_arcs.difference(c.A)
+        
+        
+        i = c.l1
+        j = c.b
+              
+        for x,y in non_built_arcs: 
+            left = min(x,y)  #O(n)
+            right = max(x,y) #O(n)
+            if (j > right or (j==right and i < left) or self._y_has_head(c.A,x,y)
+                or self._weakly_connected(c.A, x, y,c, gold_arcs)):
+                U.add((x,y))
+        
+        I = gold_arcs.difference(U)
+
+        return len(U) + self._count_cycles( c.A.union(I))
+    
+    
+    #TODO: This can be done more efficient
+    #O(n^2)
+    def _weakly_connected(self,A,x,y,c, gold_arcs):
+        
+        weakly_connected = False 
+        end_path = False
+        parent = x
+        
+        while parent != 0 and not weakly_connected and not end_path and  A  != set([]):
+            if (parent,y) in A:
+                weakly_connected = True
+                break
+            else:     
+
+                for (a,b) in A:
+                    if b == parent: 
+                        parent = a
+                        break
+                    else:
+                        end_path = True
+                        
+                    
+        return weakly_connected
+    
+    
+    """
+    Tarjan (1972) implementation at https://github.com/bwesterb/py-tarjan/
+    O(n)
+    """
+    def _count_cycles(self, A):
+        
+        d = {}
+        for a,b in A:
+            if a not in d:
+                d[a] = [b]
+            else:
+                d[a].append(b)
+                   
+        return sum([1 for e in tarjan(d) if len(e) > 1])
+    
+    
+    """
+    Determines if node y has already a head
+    """
+    #O(n)
+    def _y_has_head(self,A,x,y):
+        
+        for z,y_prime in A:
+            if y_prime == y and z != x:
+                return True
+        return False
+     
+    #O(n)
+#     def violates_single_root(self, A):
+#         print A,[1 for (h,d) in A if h==0], len([1 for (h,d) in A if h==0]) != 0 
+#         return len([1 for (h,d) in A if h==0]) != 0 
+    
diff --git a/bcovington/parser.py~ b/bcovington/parser.py~
new file mode 100644
index 0000000..e69de29
diff --git a/bcovington/src/covington.py b/bcovington/src/covington.py
new file mode 100644
index 0000000..e29dae5
--- /dev/null
+++ b/bcovington/src/covington.py
@@ -0,0 +1,875 @@
+from dynet import *
+from utils import read_conll, write_conll, CovingtonConfiguration
+from operator import itemgetter
+from itertools import chain
+from tarjan import tarjan
+import time, random
+import numpy as np
+import os
+import warnings
+
+
+"""
+This is a module extended from original the transition-based BIST-Parser barchybrid:
+
+https://github.com/elikip/bist-parser/blob/master/barchybrid/
+Kiperwasser, E., & Goldberg, Y. (2016). Simple and accurate dependency parsing using bidirectional LSTM feature representations. arXiv preprint arXiv:1603.04351.
+
+
+that has been adapted to include to support non-projective transition-based dependency parsing
+using an implementation (O(n^2)) of the traditional Covington's (2001) algorithm, according
+to the list-based transition-based described in Nivre (2008).
+
+Covington, M. A. (2001). A fundamental algorithm for dependency parsing. In Proceedings of the 39th annual ACM southeast conference (pp. 95-102).
+Nivre, J. (2008). Algorithms for deterministic incremental dependency parsing. Computational Linguistics, 34(4), 513-553.
+
+We also include the O(n) dynamic oracle described in Gomez-Rodriguez and Fernandez-Gonzalez (2015).
+TODO: Current implementation is O(n^2)
+
+Gomez-Rodriguez, C., & Fernandez-Gonzalez, D. (2015). An efficient dynamic oracle for unrestricted non-projective parsing. Volume 2: Short Papers, 256.
+
+"""
+
+
+
+class CovingtonBILSTM:
+    
+    #ACTIVATION FUNCTIONS
+    TANH = 'tanh'
+    SIGMOID = 'sigmoid'
+    RELU = 'relu'
+    TANH3 = 'tanh3'
+    
+    #OPTIMIZERS
+    SGD="sgd"
+    MOMENTUM="momentum"
+    ADAGRAD="adagrad"
+    ADADELTA="adadelta"
+    ADAM = "adam"
+      
+    #SPECIAL INDEXES
+    INDEX_WORD_PAD = 1
+    INDEX_WORD_INITIAL = 2
+    INDEX_POS_PAD = 1
+    INDEX_POS_INITIAL = 2
+    INIT_WORD_INDEX = 3
+    INIT_POS_INDEX = INIT_WORD_INDEX
+    
+    INDEX_FEATS_PAD = 1
+    INDEX_FEATS_INITIAL= 2
+    INIT_FEATS_INDEX = INIT_WORD_INDEX
+    
+    #TRANSITIONS
+    LEFT_ARC = 0
+    RIGHT_ARC = 1
+    SHIFT = 2
+    NO_ARC = 3
+    TRANSITIONS = [LEFT_ARC, RIGHT_ARC, SHIFT, NO_ARC]    
+
+    #OTHER HYPERPARAMETERS
+    SIZE_TRANSITIONS = len(TRANSITIONS)
+    
+    def __init__(self, words, lemmas, cpos, pos, feats, rels, w2i, l2i, options, path_oov_external_embedding=None,
+                 pretrained=False):
+        
+        self.model = Model()
+        if options.optimizer == self.ADAM:
+            self.trainer = AdamTrainer(self.model)
+        elif options.optimizer == self.SGD:
+            self.trainer = SimpleSGDTrainer(self.model)
+        elif options.optimizer == self.MOMENTUM:
+            self.trainer = MomentumSGDTrainer(self.model)
+        elif options.optimizer == self.ADAGRAD:
+            self.trainer = AdagradTrainer(self.model)
+        elif options.optimizer == self.ADADELTA:
+            self.trainer = AdadeltaTrainer(self.model)
+        else:
+            raise NotImplementedError("Selected optimizer is not available")
+                     
+        random.seed(1)
+
+        self.activations = {self.TANH: tanh, 
+                            self.SIGMOID: logistic, 
+                            self.RELU: rectify, 
+                            self.TANH3: (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))}
+        
+        self.activation = self.activations[options.activation]
+
+        self.oracle = options.oracle
+        
+        
+        self.ldims = options.lstm_dims * 2 #*2 because it is a bi-lstm
+        self.wdims = options.wembedding_dims 
+        self.pdims = options.pembedding_dims 
+        self.rdims = options.rembedding_dims 
+        self.layers = options.lstm_layers
+        self.wordsCount = words
+        
+        self.vocab = {word: ind+self.INIT_WORD_INDEX for word, ind in w2i.iteritems()} 
+        self.lemmas = {lemma: ind+self.INIT_WORD_INDEX for lemma,ind in l2i.iteritems()}
+        self.cpos = {cpos: ind+self.INIT_POS_INDEX for ind, cpos in enumerate(cpos)}
+        self.pos = {pos: ind+self.INIT_POS_INDEX for ind, pos in enumerate(pos)}
+        self.feats = {f: ind+self.INIT_FEATS_INDEX for ind, f in enumerate(feats)}
+        self.rels = {word: ind for ind, word in enumerate(rels)}
+        
+        #List of dependency types
+        self.irels = rels 
+
+        self.headFlag = options.headFlag
+        self.rlMostFlag = options.rlMostFlag
+        self.rlFlag = options.rlFlag
+        self.kb = options.window_b
+        self.kl1 = options.window_l1
+        self.kl2_r = options.window_l2r
+        self.kl2_l = options.window_l2l
+
+        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0)
+        
+        #Reading external embedding files, if they exists
+
+        #INFORMATION FOR EXTERNAL WORD EMBEDDINGS
+        self.external_embedding = None
+        self.edim = None
+        self.noextrn = None
+        self.extrnd = None
+        self.elookup = None
+        if options.external_embedding is not None and os.path.exists(options.external_embedding):
+            self.external_embedding, self.edim,self.noextrn,self.extrnd, self.elookup = self._assign_external_embeddings(options.external_embedding,
+                                                                                                                    self.INDEX_WORD_PAD, self.INDEX_WORD_INITIAL)
+        else:
+            warnings.warn("Not using any external file for FORM embeddings")
+                
+        #INFORMATION FOR THE EXTERNAL CPOSTAG EMBEDDINGS
+        self.cpos_external_embedding = None
+        self.cpos_edim = None
+        self.cpos_noextrn = None
+        self.cpos_extrnd = None
+        self.cpos_elookup = None
+        if options.cpos_external_embedding is not None and os.path.exists(options.cpos_external_embedding):
+            self.cpos_external_embedding, self.cpos_edim,self.cpos_noextrn,self.cpos_extrnd, self.cpos_elookup = self._assign_external_embeddings(options.cpos_external_embedding,
+                                                                                                                                             self.INDEX_POS_PAD, self.INDEX_POS_INITIAL)
+        else:
+            warnings.warn("Not using any external file for CPOSTAG embeddings")
+            
+        #INFORMATION FOR THE EXTERNAL POSTAG EMBEDDINGS
+        self.pos_external_embedding = None
+        self.pos_edim = None
+        self.pos_noextrn = None
+        self.pos_extrnd = None
+        self.pos_elookup= None
+        if options.pos_external_embedding is not None and os.path.exists(options.pos_external_embedding):
+            self.pos_external_embedding, self.pos_edim,self.pos_noextrn,self.pos_extrnd, self.pos_elookup = self._assign_external_embeddings(options.pos_external_embedding,
+                                                                                                                                             self.INDEX_POS_PAD, self.INDEX_POS_INITIAL)
+        else:
+            warnings.warn("Not using any external file for POSTAG embeddings")
+            
+        #INFORMATION FOR THE EXTERNAL FEATS EMBEDDINGS
+        self.feats_external_embedding = None
+        self.feats_edim = None
+        self.feats_noextrn = None
+        self.feats_extrnd = None
+        self.feats_elookup= None
+              
+        if options.feats_external_embedding is not None and os.path.exists(options.feats_external_embedding):
+            self.feats_external_embedding, self.feats_edim,self.feats_noextrn,self.feats_extrnd, self.feats_elookup = self._assign_external_embeddings(options.feats_external_embedding,                                                                                                                        self.INDEX_FEATS_PAD, self.INDEX_FEATS_INITIAL)
+        else:
+            warnings.warn("Not using any external file for FEATS embeddings")        
+        
+        
+        #INFORMATION FOR THE EXTERNAL FEATS EMBEDDINGS
+#         self.lemmas_external_embedding = None
+#         self.lemmas_edim = None
+#         self.lemmas_noextrn = None
+#         self.lemmas_extrnd = None
+#         self.lemmas_elookup= None
+              
+#         if options.lemmas_external_embedding is not None and os.path.exists(options.lemmas_external_embedding):
+#             self.lemmas_external_embedding, self.lemmas_edim,self.lemmas_noextrn,self.lemmas_extrnd, self.lemmas_elookup = self._assign_external_embeddings(options.lemmas_external_embedding,                                                                                                                        self.INDEX_FEATS_PAD, self.INDEX_FEATS_INITIAL)
+#         else:
+#             warnings.warn("Not using any external file for LEMMAS embeddings")        
+        
+        
+        
+        
+        self.oov_external_embedding = None
+        self.oov_edim = None
+        self.oov_noextrn = None
+        self.oov_extrnd = None
+        self.oov_elookup = None
+        
+        
+        if path_oov_external_embedding is not None and os.path.exists(options.feats_external_embedding):
+                        self.oov_external_embedding, self.oov_edim,self.oov_noextrn,self.oov_extrnd, self.oov_elookup = self._assign_external_embeddings(path_oov_external_embedding,
+                                                                                                                    self.INDEX_WORD_PAD, self.INDEX_WORD_INITIAL) 
+
+        if self.oov_external_embedding is not None and self.oov_edim != self.edim:
+            raise ValueError("The dimensions of the embeddings for OOV words is not equal to the dimension of the rest of external word embeddings (self.oov_edim != self.edim)")
+            
+        #Obtaining the dimension of the input
+        dims = (self.wdims + self.pdims + (self.edim if self.external_embedding is not None else 0) + 
+                                          (self.cpos_edim if self.cpos_external_embedding is not None else 0) +
+                                          (self.pos_edim if self.pos_external_embedding is not None else 0)+
+                                          (self.feats_edim if self.feats_external_embedding is not None else 0)
+#                                           +
+#                                           (self.lemmas_edim if self.lemmas_external_embedding is not None else 0)
+                                          )
+        
+        
+        #Initialization of the architecture
+        
+        self.blstmFlag = options.blstmFlag
+        self.bibiFlag = options.bibiFlag
+
+        if self.bibiFlag:
+            self.surfaceBuilders = [VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model),
+                                    VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model)]
+            self.bsurfaceBuilders = [VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model),
+                                     VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model)]
+        elif self.blstmFlag:
+            if self.layers > 0:
+                self.surfaceBuilders = [VanillaLSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model), LSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model)]
+            else:
+                self.surfaceBuilders = [SimpleRNNBuilder(1, dims, self.ldims * 0.5, self.model), LSTMBuilder(1, dims, self.ldims * 0.5, self.model)]
+
+
+        self.hidden_units = options.hidden_units
+        self.hidden2_units = options.hidden2_units
+        self.vocab['*PAD*'] = self.INDEX_WORD_PAD
+        self.cpos['*PAD*'] = self.INDEX_POS_PAD
+        self.feats['*PAD*'] = self.INDEX_FEATS_PAD
+
+        self.vocab['*INITIAL*'] = self.INDEX_WORD_INITIAL
+        self.cpos['*INITIAL*'] = self.INDEX_POS_INITIAL
+        self.feats['*INITIAL*'] = self.INDEX_FEATS_INITIAL
+
+        self.wlookup = self.model.add_lookup_parameters((len(words) + self.INIT_WORD_INDEX, self.wdims))
+        self.plookup = self.model.add_lookup_parameters((len(cpos) + self.INIT_POS_INDEX, self.pdims))
+        self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims))
+
+        
+        self.word2lstm = self.model.add_parameters((self.ldims, dims))
+        
+        self.word2lstmbias = self.model.add_parameters((self.ldims))
+        self.lstm2lstm = self.model.add_parameters((self.ldims, self.ldims * self.nnvecs + self.rdims))
+        self.lstm2lstmbias = self.model.add_parameters((self.ldims))
+
+        self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * self.nnvecs * (self.kl1 + self.kl2_l + self.kl2_r  + self.kb)))
+        self.hidBias = self.model.add_parameters((self.hidden_units))
+
+        self.hid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
+        self.hid2Bias = self.model.add_parameters((self.hidden2_units))
+
+        self.outLayer = self.model.add_parameters((self.SIZE_TRANSITIONS, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
+        self.outBias = self.model.add_parameters((self.SIZE_TRANSITIONS))
+
+        self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * self.nnvecs * (self.kl1 + self.kl2_l + self.kl2_r  + self.kb)))
+        self.rhidBias = self.model.add_parameters((self.hidden_units))
+
+        self.rhid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
+        self.rhid2Bias = self.model.add_parameters((self.hidden2_units))
+
+        self.routLayer = self.model.add_parameters((2 * (len(self.irels) + 0) + 1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
+        self.routBias = self.model.add_parameters((2 * (len(self.irels) + 0) + 1))
+
+        self.pretrained = pretrained
+
+
+    def _assign_external_embeddings(self,option_external_embedding,
+                                    index_pad,index_initial):
+        """
+        Reads an external embedding file
+        Returns:
+        external_embedding: A dictionary of key:embedding
+        edim: Dimension of the embedding
+        noextrn: ??
+        extrnd: Index for each key
+        elookup: Parameter lookup 
+        """
+            
+
+        if option_external_embedding is not None:
+ 
+            external_embedding_fp = open(option_external_embedding,'r')
+            external_embedding_fp.readline()
+                
+            external_embedding = {line.split(' ')[0] : [float(f) for f in line.strip().split(' ')[1:]] 
+                                           for line in external_embedding_fp}
+            
+            
+            external_embedding_fp.close()
+    
+            edim = len(external_embedding.values()[0])
+            noextrn = [0.0 for _ in xrange(edim)]
+            extrnd = {element: i + self.INIT_POS_INDEX 
+                                    for i, element in enumerate(external_embedding)}
+            elookup = self.model.add_lookup_parameters((len(external_embedding) + self.INIT_WORD_INDEX, edim))
+                
+            for element, i in extrnd.iteritems():
+                    elookup.init_row(i, external_embedding[element])
+            extrnd['*PAD*'] = index_pad
+            extrnd['*INITIAL*'] = index_initial
+
+            return external_embedding, edim, noextrn, extrnd, elookup
+    
+        return None,None,None,None,None
+
+
+
+    def __evaluate(self, c, train):
+        """
+        @param c: A CovingtonConfiguration instance
+        @param train: True if used in the training phase, False otherwise
+        Returns the scores for all possible transitions (training)
+        or the top ones (testing) for a given configuration c
+        """
+              
+        #Gets the embeddings for the terms to be used in the prediction
+        top_l1  = [c.sentence[c.l1-i].lstms if c.l1 - i > 0 else [self.empty] for i in xrange(self.kl1)]
+        top_l2l = [c.sentence[c.l1+1+i].lstms if c.l1+1+i < c.b  else [self.empty] for i in xrange(self.kl2_l)]
+        top_l2r = [c.sentence[c.b-i].lstms if c.b-i > c.l1 else [self.empty] for i in xrange(self.kl2_r)]
+        topBuffer = [c.sentence[c.b+i-1].lstms if c.b+i-1 <= c.sentence[-1].id else [self.empty] for i in xrange(self.kb)]
+
+        input = concatenate(list(chain(*(top_l1 + top_l2l + top_l2r + topBuffer))))
+
+        if self.hidden2_units > 0:
+            routput = (self.routLayer.expr() * self.activation(self.rhid2Bias.expr() + self.rhid2Layer.expr() * self.activation(self.rhidLayer.expr() * input + self.rhidBias.expr())) + self.routBias.expr())
+        else:
+            routput = (self.routLayer.expr() * self.activation(self.rhidLayer.expr() * input + self.rhidBias.expr()) + self.routBias.expr())
+
+        if self.hidden2_units > 0:
+            output = (self.outLayer.expr() * self.activation(self.hid2Bias.expr() + self.hid2Layer.expr() * self.activation(self.hidLayer.expr() * input + self.hidBias.expr())) + self.outBias.expr())
+        else:
+            output = (self.outLayer.expr() * self.activation(self.hidLayer.expr() * input + self.hidBias.expr()) + self.outBias.expr())
+
+        scrs, uscrs = routput.value(), output.value()
+
+        if train:
+            left_arc_info = [(rel,self.LEFT_ARC, scrs[1+j*2] + uscrs[self.LEFT_ARC], routput[1+j*2]+ output[self.LEFT_ARC]) 
+                                for j, rel in enumerate(self.irels) if c.l1 > 0 and c.l1 < c.b and c.b <= c.sentence[-1].id]
+    
+            right_arc_info = [(rel,self.RIGHT_ARC, scrs[2+j*2] + uscrs[self.RIGHT_ARC], routput[2+j*2]+ output[self.RIGHT_ARC]) 
+                                 for j, rel in enumerate(self.irels) if c.l1 >= 0 and c.l1 < c.b and c.b <= c.sentence[-1].id]
+            
+            shift_info = [ (None, self.SHIFT, scrs[0] + uscrs[self.SHIFT], routput[0] + output[self.SHIFT]) ] if c.b <= c.sentence[-1].id else []
+
+            no_arc_info = [(None, self.NO_ARC,scrs[3] + uscrs[self.NO_ARC], routput[3] + output[self.NO_ARC] )] if c.l1> 0 and  c.b <= c.sentence[-1].id else []
+            
+            ret = [left_arc_info,right_arc_info, shift_info, no_arc_info]
+                            
+        else:
+            #It is done different from the 'train' phase, due to the dynamic oracle.
+            #In the test phase we already pick the most likely transition/dependency instead of returning them all
+            #and then selecting one according to the prediction of the dynamic oracle
+            sLEFT,rLEFT = max(zip(scrs[1::2],self.irels))
+            sRIGHT,rRIGHT = max(zip(scrs[2::2],self.irels))
+            sLEFT += uscrs[self.LEFT_ARC]
+            sRIGHT += uscrs[self.RIGHT_ARC]
+            ret = [ [(rLEFT, self.LEFT_ARC, sLEFT) ] if (c.l1 > 0 and c.l1 < c.b and c.b <= c.sentence[-1].id and self._is_valid_left_arc(c)) else [],  
+                    [(rRIGHT, self.RIGHT_ARC, sRIGHT) ] if (c.l1 >= 0 and c.l1 < c.b and c.b <= c.sentence[-1].id and self._is_valid_right_arc(c)) else [],  
+                    [(None, self.SHIFT, scrs[0] + uscrs[self.SHIFT]) ] if (c.b <= c.sentence[-1].id) else [],
+                    [(None, self.NO_ARC,scrs[3] + uscrs[self.NO_ARC]) ] if (c.l1 > 0 and c.b <= c.sentence[-1].id) else []
+                     ]
+        return ret
+
+
+    def Save(self, filename):
+        self.model.save(filename)
+
+
+    def Load(self, filename):
+        self.model.load(filename)
+
+    def Init(self):
+        evec = self.elookup[1] if self.external_embedding is not None  else None
+        cpos_evec = self.cpos_elookup[1] if self.cpos_external_embedding is not None else None
+        pos_evec = self.pos_elookup[1] if self.pos_external_embedding is not None else None
+        feats_evec = self.feats_elookup[1] if self.feats_external_embedding is not None else None
+      #  lemmas_evec = self.lemmas_elookup[1] if self.lemmas_external_embedding is not None else None
+        paddingWordVec = self.wlookup[1]
+        paddingPosVec = self.plookup[1] if self.pdims > 0 else None
+      #  paddingVec = tanh(self.word2lstm.expr() * concatenate(filter(None, [paddingWordVec, paddingPosVec, evec, cpos_evec, pos_evec, feats_evec, lemmas_evec])) + self.word2lstmbias.expr())
+        paddingVec = tanh(self.word2lstm.expr() * concatenate(filter(None, [paddingWordVec, paddingPosVec, evec, cpos_evec, pos_evec, feats_evec])) + self.word2lstmbias.expr())
+        self.empty = paddingVec if self.nnvecs == 1 else concatenate([paddingVec for _ in xrange(self.nnvecs)])
+
+
+    def getWordEmbeddings(self, sentence, train):
+        """
+        Gets the embeddings (also external) for every term in a sentence
+        Returns a vector of all embeddings concatenated
+        """
+        
+        for root in sentence:
+            c = float(self.wordsCount.get(root.norm, 0))
+            dropFlag =  not train or (random.random() < (c/(0.25+c)))
+            sys.stdout.flush()
+            root.wordvec = self.wlookup[int(self.vocab.get(root.norm, 0)) if dropFlag else 0]
+            root.cposvec = self.plookup[int(self.cpos.get(root.cpos,0))] if self.pdims > 0 else None
+
+            #For word embeddings
+            if self.external_embedding is not None:
+                if root.form in self.external_embedding:
+                    root.evec = self.elookup[self.extrnd[root.form]]
+                elif root.norm in self.external_embedding:
+                    root.evec = self.elookup[self.extrnd[root.norm]]
+                else:
+                    if (self.oov_external_embedding is not None and root.form.replace(" ","_") in self.oov_external_embedding):
+                        root.evec = self.oov_elookup[self.oov_extrnd[root.form.replace(" ","_")]]
+                    else:
+                        root.evec = self.elookup[0]
+            else:
+                root.evec = None
+
+            #For cpostag embeddings
+            if self.cpos_external_embedding is not None:
+                if root.cpos in self.cpos_external_embedding:
+                    root.cposevec = self.cpos_elookup[self.cpos_extrnd[root.cpos]]
+                else:
+                    root.cposevec = self.cpos_elookup[0]
+            else:
+                root.cposevec = None
+            
+            #For postag embeddings
+            if self.pos_external_embedding is not None:
+                if root.pos in self.pos_external_embedding:
+                    root.posevec = self.pos_elookup[self.pos_extrnd[root.pos]]
+                else:
+                    root.posevec = self.pos_elookup[0]
+            else:
+                root.posevec = None
+#             
+            #For feats embeddings
+            if self.feats_external_embedding is not None:
+                if root.feats in self.feats_external_embedding:
+                    root.featsevec = self.feats_elookup[self.feats_extrnd[root.feats]]
+                else:
+                    root.featsevec = self.feats_elookup[0]
+            else:
+                root.featsevec = None
+            
+            
+            #For lemmas embeddings
+#             if self.lemmas_external_embedding is not None:
+#                 if root.lemma in self.lemmas_external_embedding:
+#                     root.lemmasevec = self.lemmas_elookup[self.lemmas_extrnd[root.lemma]]
+#                 else:
+#                     root.lemmasevec = self.lemmas_elookup[0]
+#             else:
+#                 root.lemmasevec = None            
+            
+            
+         #   root.ivec = concatenate(filter(None, [root.wordvec, root.cposvec, root.evec, root.cposevec, root.posevec, root.featsevec, root.lemmasevec]))
+            root.ivec = concatenate(filter(None, [root.wordvec, root.cposvec, root.evec, root.cposevec, root.posevec, root.featsevec]))
+            
+        if self.blstmFlag:
+            forward  = self.surfaceBuilders[0].initial_state()
+            backward = self.surfaceBuilders[1].initial_state()
+
+            for froot, rroot in zip(sentence, reversed(sentence)):
+                forward = forward.add_input( froot.ivec )
+                backward = backward.add_input( rroot.ivec )
+                froot.fvec = forward.output()
+                rroot.bvec = backward.output()
+            for root in sentence:
+                root.vec = concatenate( [root.fvec, root.bvec] )
+
+            if self.bibiFlag:
+                bforward  = self.bsurfaceBuilders[0].initial_state()
+                bbackward = self.bsurfaceBuilders[1].initial_state()
+
+                for froot, rroot in zip(sentence, reversed(sentence)):
+                    bforward = bforward.add_input( froot.vec )
+                    bbackward = bbackward.add_input( rroot.vec )
+                    froot.bfvec = bforward.output()
+                    rroot.bbvec = bbackward.output()
+                for root in sentence:
+                    root.vec = concatenate( [root.bfvec, root.bbvec] )
+
+        else:
+            for root in sentence:
+                root.ivec = (self.word2lstm.expr() * root.ivec) + self.word2lstmbias.expr()
+                root.vec = tanh( root.ivec )
+
+
+    def Predict(self, conll_path):
+        """
+        Makes non-projective depending parsing prediction given a ConLL-X file
+        """
+    
+        
+        with open(conll_path, 'r') as conllFP:
+            for iSentence, sentence in enumerate(read_conll(conllFP)):
+                self.Init()
+
+                l1 = sentence[0].id
+                b = sentence[1].id
+                arcs = set([])   
+                
+                self.getWordEmbeddings(sentence, False)
+
+                for root in sentence:
+                    root.lstms = [root.vec for _ in xrange(self.nnvecs)]
+
+                hoffset = 1 if self.headFlag else 0
+
+                c = CovingtonConfiguration(l1,b,sentence,arcs)
+                while not self._is_final_state(b,sentence):
+
+                    transition_scores = self.__evaluate(c, False)
+
+                    
+                    best = max(chain(*transition_scores), key = itemgetter(2) )
+
+                    if best[1] == self.LEFT_ARC:
+                        
+                        sentence[l1].pred_parent_id = sentence[b].id
+                        sentence[l1].pred_relation = best[0]
+                        best_op = self.LEFT_ARC
+                        if self.rlMostFlag:
+                            sentence[b].lstms[best_op+hoffset] = sentence[l1].lstms[best_op+hoffset]
+                        if self.rlFlag:
+                            sentence[b].lstms[best_op+hoffset] = sentence[l1].vec
+
+                        arcs.add((b,l1))
+                        l1 = l1 -1
+                        
+                    elif best[1] == self.RIGHT_ARC:
+                        
+                        sentence[b].pred_parent_id = sentence[l1].id
+                        sentence[b].pred_relation = best[0]
+
+                        best_op = self.RIGHT_ARC
+                        if self.rlMostFlag:
+                            sentence[l1].lstms[best_op+hoffset] = sentence[b].lstms[best_op+hoffset]
+                        if self.rlFlag:
+                            sentence[l1].lstms[best_op+hoffset] = sentence[b].vec
+                        
+                        arcs.add((l1,b))
+                        l1 = l1-1
+
+                    elif best[1] == self.SHIFT:
+                        l1 = b
+                        b = b + 1
+
+
+                    elif best[1] == self.NO_ARC:
+                        l1 = l1 - 1
+
+                    c = CovingtonConfiguration(l1,b,sentence,arcs)
+                renew_cg()
+                yield sentence
+
+
+    def Train(self, conll_path):
+        """
+        Trains a O(n^2) Covington's parser with a O(n^2) dynamic oracle
+        """
+        mloss = 0.0
+        errors = 0
+        batch = 0
+        eloss = 0.0
+        eerrors = 0
+        lerrors = 0
+        etotal = 0
+        ltotal = 0
+        ninf = -float('inf')
+
+        hoffset = 1 if self.headFlag else 0
+
+        start = time.time()
+
+        with open(conll_path, 'r') as conllFP:
+            shuffledData = list(read_conll(conllFP))
+            
+            random.shuffle(shuffledData)
+
+
+            errs = []
+            eeloss = 0.0
+
+            self.Init()
+
+            for iSentence, sentence in enumerate(shuffledData):    
+                if iSentence % 100 == 0 and iSentence != 0:
+                    print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Labeled Errors:', (float(lerrors) / etotal) , 'Time', time.time()-start                  
+                    start = time.time()
+                    eerrors = 0
+                    eloss = 0.0
+                    etotal = 0
+                    lerrors = 0
+                    ltotal = 0
+
+                self.getWordEmbeddings(sentence, True)
+                #We obtain the gold arcs to then compute the dynamic oracle for covington
+                gold_arcs = set([])
+                for word in sentence:
+                    
+                    #TODO: Weird error if not, adds and arc (0,0)
+                    if word.id != word.parent_id:
+                        gold_arcs.add((word.parent_id,word.id))
+                
+                
+                l1 = sentence[0].id
+                b = sentence[1].id
+                arcs = set([])               
+                c = CovingtonConfiguration(l1,b,sentence,arcs)
+                loss_c = self._loss(c,gold_arcs, iSentence)
+            
+                for word in sentence:
+                    word.lstms = [word.vec for _ in xrange(self.nnvecs)]
+
+                hoffset = 1 if self.headFlag else 0
+
+                while not self._is_final_state(b,sentence):
+
+                    costs = [None,None,None,None]
+                    transition_scores = self.__evaluate(c, True)
+
+                    #We determine if the transitions are valid for a given configuration c
+                    for t in self.TRANSITIONS:
+                        
+                        l1_aux = l1
+                        b_aux = b
+                        arcs_aux =  set(arcs)
+                        valid_transition = False
+                        
+                        if t == self.LEFT_ARC and self._is_valid_left_arc(c):
+                            arcs_aux.add((b_aux,l1_aux))
+                            l1_aux = l1_aux -1
+                            valid_transition = True
+
+                        if t == self.RIGHT_ARC and l1 >=0 and self._is_valid_right_arc(c):
+                            arcs_aux.add((l1_aux,b_aux))
+                            l1_aux = l1_aux-1
+                            valid_transition = True
+                                 
+                        if t == self.NO_ARC and l1 >0:
+                            l1_aux = l1_aux-1
+                            valid_transition = True   
+                               
+                        if t == self.SHIFT:
+                            l1_aux = b_aux
+                            b_aux = b_aux + 1 
+                            valid_transition = True      
+                        
+                        if valid_transition:  
+             
+                            new_c = CovingtonConfiguration(l1_aux,b_aux,sentence,arcs_aux)
+                            loss_new_c = self._loss(new_c,gold_arcs,iSentence)
+                                               
+                            cost = loss_new_c - loss_c
+                            costs[t] = float(cost)
+
+                    #Valid transitions are those with cost 0
+                    #If it is a LEFT/RIGHT arc, also the relation must match with the one in gold standard
+                    valid_transitions = [s for s in chain(*transition_scores) if costs[s[1]] == 0 and (s[1] in [self.SHIFT,self.NO_ARC] 
+                                                                                                          or ((s[1] == self.LEFT_ARC and s[0] == sentence[l1].relation) 
+                                                                                                          or (s[1] == self.RIGHT_ARC and s[0] == sentence[b].relation)))]
+
+                    best_valid = max(valid_transitions, key=itemgetter(2))
+
+                    wrong_transitions = [s for s in chain(*transition_scores) if costs[s[1]] is not None and ( (costs[s[1]] != 0) or (s[1] in [self.LEFT_ARC,self.RIGHT_ARC] 
+                                                                                                          and ((s[1] == self.LEFT_ARC and s[0] != sentence[l1].relation) 
+                                                                                                              or (s[1] == self.RIGHT_ARC and s[0] != sentence[b].relation))) ) ]
+                    
+                    #Aggressive exploration as done by Kiperwasser and Golberg (2016)
+                    if wrong_transitions != []:
+                        best_wrong = max(wrong_transitions, key=itemgetter(2))    
+
+                        best = best_valid if ( (not self.oracle) or (best_valid[2] - best_wrong[2] > 1.0) 
+                                              or (best_valid[2] > best_wrong[2] and random.random() > 0.1) ) else best_wrong
+                    else:
+                        best = best_valid 
+
+
+                    #Moving a new configuration based on the "best" choice
+                    if best[1] == self.LEFT_ARC:
+                                      
+                        sentence[l1].pred_parent_id = sentence[b].id
+                        sentence[l1].pred_relation = best[0]
+
+                        best_op = self.LEFT_ARC
+                        if self.rlMostFlag:
+                            sentence[b].lstms[best_op+hoffset] = sentence[l1].lstms[best_op+hoffset]
+                        if self.rlFlag:
+                            sentence[b].lstms[best_op+hoffset] = sentence[l1].vec
+                        
+                        child = sentence[l1]
+                        arcs.add((b,l1))
+                        l1 = l1 -1
+                        
+                    elif best[1] == self.RIGHT_ARC:
+                        
+                        
+                        sentence[b].pred_parent_id = sentence[l1].id
+                        sentence[b].pred_relation = best[0]
+
+                        best_op = self.RIGHT_ARC
+                        if self.rlMostFlag:
+                            sentence[l1].lstms[best_op+hoffset] = sentence[b].lstms[best_op+hoffset]
+                        if self.rlFlag:
+                            sentence[l1].lstms[best_op+hoffset] = sentence[b].vec
+                        
+                        arcs.add((l1,b))
+                        child = sentence[b]
+                        l1 = l1-1
+
+
+                    elif best[1] == self.SHIFT:
+                        l1 = b
+                        child = sentence[b]
+                        b = b + 1
+
+
+                    elif best[1] == self.NO_ARC:
+                        l1 = l1 - 1
+                        child = sentence[l1]
+
+
+                    if best_valid[2] < best_wrong[2] + 1.0:
+                        loss = best_wrong[3] - best_valid[3]
+                        mloss += 1.0 + best_wrong[2] - best_valid[2]
+                        eloss += 1.0 + best_wrong[2] - best_valid[2]
+                        errs.append(loss)
+
+                    
+                    if best[1] not in [self.SHIFT, self.NO_ARC] and (child.pred_parent_id != child.parent_id or child.pred_relation != child.relation):
+                        lerrors += 1
+                        if child.pred_parent_id != child.parent_id:
+                            errors += 1 
+                            eerrors += 1 
+
+                    etotal += 1
+                    c = CovingtonConfiguration(l1,b,sentence,arcs)
+                    loss_c = self._loss(c,gold_arcs, iSentence)
+                 
+
+                if len(errs) > 50: 
+                    eerrs = esum(errs)
+                    scalar_loss = eerrs.scalar_value()
+                    eerrs.backward()
+                    self.trainer.update()
+                    errs = []
+                    lerrs = []
+
+                    renew_cg()
+                    self.Init()
+
+        if len(errs) > 0:
+            eerrs = (esum(errs)) # * (1.0/(float(len(errs))))
+            eerrs.scalar_value()
+            eerrs.backward()
+            self.trainer.update()
+
+            errs = []
+            lerrs = []
+
+            renew_cg()
+
+        self.trainer.update_epoch()
+        print "Loss: ", mloss/iSentence
+
+
+    def _is_final_state(self,b,sentence):
+        return b >= len(sentence)
+
+
+    def _is_valid_left_arc(self,c):
+        
+        aux = set(c.A)
+        aux.add((c.b,c.l1))
+        l1_has_head = self._y_has_head(c.A, c.b, c.l1) 
+        return (c.l1 > 0 and not l1_has_head
+                and self._count_cycles(aux) == 0)
+
+
+    def _is_valid_right_arc(self,c):
+        
+        b_has_head = self._y_has_head(c.A, c.l1, c.b)
+        aux = set(c.A)
+        aux.add((c.l1,c.b))
+        return ((not b_has_head) and self._count_cycles(aux) == 0)
+        
+        
+    """
+    Gomez-Rodriguez & Fernandez-Gonzalez: 
+    An Efficiente Dynamic Oracle for Unrestricted Non-Projective Parsing  (ACL,2015)
+    Algorithm 1
+    """
+    def _loss(self, c, gold_arcs, iSentence):
+        
+        U = set([]) #set of unreachable nodes
+        non_built_arcs = gold_arcs.difference(c.A)
+        
+        
+        i = c.l1
+        j = c.b
+              
+        for x,y in non_built_arcs: 
+            left = min(x,y)  #O(n)
+            right = max(x,y) #O(n)
+            if (j > right or (j==right and i < left) or self._y_has_head(c.A,x,y)
+                or self._weakly_connected(c.A, x, y,c, gold_arcs)):
+                U.add((x,y))
+        
+        I = gold_arcs.difference(U)
+
+        return len(U) + self._count_cycles( c.A.union(I))
+    
+    
+    #TODO: This can be done more efficient
+    #O(n^2)
+    def _weakly_connected(self,A,x,y,c, gold_arcs):
+        
+        weakly_connected = False 
+        end_path = False
+        parent = x
+        
+        while parent != 0 and not weakly_connected and not end_path and  A  != set([]):
+            if (parent,y) in A:
+                weakly_connected = True
+                break
+            else:     
+
+                for (a,b) in A:
+                    if b == parent: 
+                        parent = a
+                        break
+                    else:
+                        end_path = True
+                        
+                    
+        return weakly_connected
+    
+    
+    """
+    Tarjan (1972) implementation at https://github.com/bwesterb/py-tarjan/
+    O(n)
+    """
+    def _count_cycles(self, A):
+        
+        d = {}
+        for a,b in A:
+            if a not in d:
+                d[a] = [b]
+            else:
+                d[a].append(b)
+                   
+        return sum([1 for e in tarjan(d) if len(e) > 1])
+    
+    
+    """
+    Determines if node y has already a head
+    """
+    #O(n)
+    def _y_has_head(self,A,x,y):
+        
+        for z,y_prime in A:
+            if y_prime == y and z != x:
+                return True
+        return False
+     
+    #O(n)
+#     def violates_single_root(self, A):
+#         print A,[1 for (h,d) in A if h==0], len([1 for (h,d) in A if h==0]) != 0 
+#         return len([1 for (h,d) in A if h==0]) != 0 
+    
diff --git a/bcovington/src/covington.pyc b/bcovington/src/covington.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7c52ffdf5bb5b35491481800b0996b6ade7e7316
GIT binary patch
literal 24143
zcmcJ1eQ;dYb>Drv_$I&)d=LOXmtP`CNu)?hqQy|6D1w9_ks$Q|k`gRvwb*?C7F_HC
z?>&&94cM_MJC2<wZIq<3<8(T$nkJprGx1EOb<);J^WnPoOzd=~?X>@NrXOi%(n+Vz
zq%)q*xWC`;zW4Sm04hpF6WqJ^<J^1CIrqGC&pG$L&HkXP<KE4~Zxl`RFN^ON@d<td
zNeTZQQ!!?Oxs0heW+7uHGiD)cCbMRt$xJpGhizF?X*LTjX0pYWn@pwEEVP-)Hd}5s
zm3FhxZYJApxy4jE%w&gVKz@sv+!E(I&16@c-)bhe#`$e#a+@i&no_$dwV8K}d0@;N
zliN+H#f%Si0Ne79@NdXLE@X0ih_r-k@x4MRU#wLtm2eA+_xy6`Mdh6+U#i_LS7*ao
z^-`@mQ=VP+3t_odwIgZZ&|8@G!qD?=T{HR==L+Qrqy@#W;LjJTW8t>+a4(Hs9iO<C
zu(k<pFX0os2=qK-CLNLnZi2_)Cin|(f|HZYrUcHmm{PNu1ij#D8wm&RKscEOK7w6f
zV5jn67xG{f*tpGjU=+9lm>sI`R!xs7fw8@+?^FFw!S@@l*LZ!#+iARhGmCi#j5lb!
z0rLP@?J}jk#@k~`UFIF2x7U=mlJk@MOlg}k`%P)PGC5P)q09kO>Q?5UDfK9G$dr1O
zIc!RO${aDJoyt6BO8v?lHKhS%hD>QtnPaB3OPS-Qv|E|SO=*uZPZ)Q6Xdk&wmWGAu
zT-Zv!mS-1g<&r9Wud*C+o!ELNq$C8hrLYa(3$+EW8ZIxWPoY$p^$WHE`I1)&3mO20
zg)+scoMFNN%J`UKCz7t{qhX(cNQIKdFBp?|%xb%t&lu$Av*u35_-BYFiw;>zm}XP`
z^%Pi(z$Oh4S`YS_6j+<UK4Zf64PcWgunvL!x`CvxAMH#EtW#irY{IS$V4Ws}ED77s
zItC&=4{^#6d8gcD!X4_+ZJrH#l<P&VPr04uS%@#>gA@0gXG!aT3Im#X&<wI_P=#G=
z$nR2qHwC_z<vl9zRog!0_bZ=M{($lal|Q8XVY6y^dc^#cF*A6gn$72pS#34*k5NGk
zM=U=YH@B#{ZC&$F+}z6M@R(W6lHR!Y@wmAyZf;69KOQ%?$IZ>@<|p{Lfh!!gCEf5O
z%cS~*C_1V9N#&nX{wd{8DSt}&)5_b0d|LTuRR65<XO;h$^3O4yKWY5;Gg!PT^otdU
zQzrOEhD}cy|C<@`-8S_Tk8@N=%$$!i&&Qb;;>?9O^P(mb<ehfozmW|un)#Q^{H3TK
z{6i&Y8jfn1zbv$_m<L(&W^*(bXMA4y7nEL5`l6|JnQ+9+zf9Fd>5GsXjWKv7&b%6D
zuEv>bno^KN?k^jW8|84QtRpwdDd)$OUQZ#X`ur<G=vAdxm0n9B*Of-@MvTF0amI}^
z<8fv}$O)3jz1P&hxf??6y{0;HH-y$k<W!%3O$fP4$CXZ`klUI@?q-a^tvGW#&b%IH
z-VkzvByvwRHz0RY$UW6uNA9MOn^gMo6z5c*za@ljD}7z*8!6<rrIC9x#$YPWycK8i
zaprBaDw~0reQPt!z7=EEA%r$c+nYjSO6gll^Ge?~Rjapz2FL$D8cLA!&zn10)`SJ_
zcMu*Z#t42NyU*tgAE5c<Y1q@k;`6D8Yd#UikE!jIbXy_|6KZ=c-IgrHqS|i7ZEKxc
zscE4<t$a~=rSnDOA8HLt)ZD9`!f_9`XlhyRL{Q`(&^D_LEbm1??6c?LV?4Au8fL~+
ze@(rojsNXdLc<c+V(v5<|8qpq);t8u*$-?^5MBCqYdH4-#LhmvEg1tV%FVmX8S`c3
z=ap9)sV-P5-R3+|whtuClc;Q;0ciWEd_|Z8zQ@8}R~hh8`Qs`BJ}NtA{*DPN+|eLm
zoMFLA)jl($?U!g()sTQ{n<8k<g62}7i;bY#dI)+j1+x^x_{w1W?M$@>>su{e`EY$b
zhqVH7&Pv!p(_hDVc|CYE%-zO%@x5`FX9V$_rR}W!I%E2K>to+rKSnh6N@KlU!E%bW
z`|AOsj-P0(|D=lZvAq68J4u~4Rc+i5l{zM?>pMmRt~J*Ge^L3V^<zY1f4Z@LBb7h7
z9w6%YnR-1;00<q)IzhR^gW2B;r6GNm(EVo}52gFFfAvt&qz}xhY>@mr)aT<vy)dK4
zf<s8hy^2>1y;82W7?u~xpYZ%#ImlIOVXknuP_7iFE1rz;A}w-$XxfkC6YN0(>l+>)
z{;%Qzh9wS-rxeqU%VQfGq7m=55q2w*;=OX<<rkJKVR^A~zwR=w4585|34Q{(lXLJD
zPL>LH%cT={frjq|Cuhsy$?0+sKDk)%1J6G>U5vfQlS%Hx;(fUq=PQMU=~Cf^w=h<~
zq2)}72)YhU`1IftJjN#BKDEcZnK5gQSpyN)i!i1i8sWW+S)!v0NMjMEt2sJ0I+4G9
z-5t)4jtyUa-Fm#F^~6*1V(!)alAND%wPbs7&cLh8JQ5-UddKHW<ziT@EiAr(K!FK3
zd%JlwG`f@!5#{zFCx@=$Ei4q=el}1L+yUnBF&}ls))BM7A^)1hSgp+k%WSe>lsqli
zjqBs7^%7|!NJ~~+tOY+_%COBnTFTsA7_q)gmKF;5kDZj-qb4QXjuEv;S1w<ic*rtQ
z?q+_xYqa<f!ld)|qcKt!eJOPT{kKl+0JnZ!Tn4<X7GdinerT3Bq<v@<EeTGdVhn10
zIY5LhSE%04c`JycRST8eOu6FaW@>)!%60cz&RdxFN+kdV!RZgt=MtWwiHjrYZl^v(
zw~f7?`VhS&3hAl-COS>bdAU*<p`-w@!XQ_h$%Uw?lOfWsU%!>34i4h&%v)Lp>0wR4
zN%y!n_Lq?Pgh%X~9&?DQ`$VmFH}9cy?!faoRj_bu7D(aWf4UHGb3A_I;_&gHojHp0
z=*6qYhdzgrbm+Aj16>4LP>x{9&MPf+arojj#Y!(C4mRN<*6H~Qn1xZEkN5b<u%4Ik
z^1@=pLn#JqL*upU*DjAu+`JZcvk=AHQZS6zVU`j1;;{B)OvFfBdI&TRU%onVQBhU2
zD>@egaN~qzjdE%R^t{q#->>;0qp^OWTB<FGkihdwcH`&HH*s<7W$nzB+AQ{PY+OWR
ze01d6_0eJFeb^!8nfvPNUcP!$Tw=E~Vn+e3cNHEE6j`p9HF3=^6e}JgbjDPIa3PO%
z3GA+=9JlOyiHOu&?<K8^o3t(_t^OvhzFHa9){K?Hea{bsOqAswEg8I2Tdsz}4dHKw
zz$4u5){2E`+k<;+AP}#*yx<`a?zy|s-<6p~ftm3NVIb}x#A<0PE5|jTL!A9er7&xQ
z?v-n`AXHInQc;#*_h60G?&YULYKlA4nlGxWoRYb62j9ZmLRL=u;^Ljk>Ac_AV8xgX
z6_HS_-t~$W!LeG^lk~-pQ$Asz2;ftrHLunH;aAmPxss~AO09Nhc~Mw`63WkFA)JfA
zAU;7z-sB4bBG1+Q##sUhwk$Jiq~VzqLaD0g;gnF`EBIAvKuyI$NHd29!6t)?YqpJa
zFf6T7ObjI=?&?=db%2;G2C*$7v`2{Aj)s~TGj13UL4k{@CSt$|S%?wf`bGUXf7~2T
zPrFURv>9#e8#QuhqH2rr;3PNc&Ee}H@dVaBhE<%mTRADD(y%ng+q+a_NE)S8qs=KL
zDWox0xFUju7!BeX^^0j#F_IiM-zrp=J*%3i7N+@>CQrh2dAh6umE^5LwOpwbcxLp{
za=B81#w88GvOiNOdQnv%jXS-*lH5&qq#0w?vO6{wk4C1=l}jbBnqRJ#A--gqtvDUk
z(9ABBN_k5_9zITC!GpF4^jKEBs!Vb{9``Jaj6YQ@MpRR!`ca-XFULy_L@rPtohB!Q
zLQ<M8N7{!?_4cHsb}5>2=_(bS<^=^W#T}79ZCfI+(`phsMkg=lC)|r;<5=_SW8-?{
zYRh4LAQX~;e2i2dh$S^~MOla@AuTG_`z0e10Y4s+{T6-C?w+L~GG3mG)S9%jdxdq0
zvUOi(aTJL-fA4hJt^mHtU+&g0JEVZ9l=6`j%CxFWulu(-05umLqhJW%DX;0H+J0hS
zK3|3?<?}>0_`jSW+v#*Wed|A&&P-RP%jv*hPo^i+ncePmMt|*@{Z9Nhm~GF*_4>{{
z-Q4cPe}~cIOr|Rvm)o5WQIk3BT*!2FCw;O9Fy5KW0cTrt!P(!2Su*X;9_I*fcpNR;
zoL4f#prO;*&cBRJA4l8a?7qx^bJ7{e4C8YEpXXBfSDoXTB00^q<xWWpDcAvD0p5EF
zhj$1mcR37Fpoj<`y!<A!+QhTM2Z(7P;_?nan{?*1)~pr;_t1jnTWvP;I8xF%`yiqk
zYb|CL>)H~ZCrx#lwN{>?y@e>jv{`G552SP$27$o<=*Hu$wRVMTZ~)d$!LSWqK=7vn
zUTaGf&RXj*EA~ukiw@DIa&j}a7{;7C%sf*32a;RNAP8}m{-wDM-vhzZQj0l{voq#e
zkpl>jY-1VyW}3}7&ZX^XeZV+QM=tv3!g2`UAJk3XWUrQ^kOB;s{c3P7so1#fp37Z?
zWe!8FRxSAVX;0tr?w?D>tM|k+aV|F;+widwz7a6e_UCdJF4Q~IVkn)<jaEzEifnza
zP@LmTbr7oq&*g5!S~O>CatVkoD)>pTgm#iVJQta=4G6u=Xb@H#Vl$7Z7FG)KgN&qT
z|L{~dLq)Jrz<<<H03H);0+lg?aquXMVKrsW9&Mv&{X?mtAoR<NHn&(Qhsw=VYK5@w
z+DnyG;0(Atez*FBgM3s~vM%;=6ry3>NC#XnB;Wo^IK>BzoB6{+XCD}eSr@MmN32>!
z7?QrkkJq<9S*Pw{uG-iTHOLtM*}dX6iei3k5%{&C#5*lq2`y8xQVTp6k1>^kF?VTm
zSSMfwzR;}aj-b}fG5IMbjH|iBNHml*xL>=M+Li{<X7`xQ%rkC#u*|&`?CaL_vLy~w
zE>v(G02f>~m{R$fMfY{IxNjg4)AQ~m%O7X*CX;{6<R2liq6J3TZSteam-{wG>LcF|
zB4fHbGuxbwOpDWz?a1te;-|7dk{NUcxItliD)LF8g-QeL5<Y=xl^7z_ysAeK&)V?R
zd9wyxhX)gnU)Bsjt@BLlMOCF62Iq0i4i$)=&1Tj<rQ50hA=uG~_F}-)NPR@c;4vzL
zzw@o;4p#hE8wQTsHya##047QXZ<F?)x)2YJ4FTv{#EJ3$P7Q3^HXHPt@xEhu&~67E
zYZ$a2SZQF~H^`bn>?(I~(tgX~T$UpmUtpWf(d%F%gP9F?1*RdkLu?`3m-=y|qyJ~5
z_P}{;aO9<KsSy9B42!y`0tLzZ4yE0^x)Rm&r1G}4H?C<jz3kH$mHRP6uM*bojBBxT
zRxfGTjT3#~Q0qrwIYIrKLcKpmB|&{4RWq2%Thw>OH4*B&qjDqad*WK`=Mm}~C;Gsl
zh-!Ki?Bnx$#Zg!R%4F88Re+05Pc~EHaBByh_FLijY1s{3W^HTqm_(1rsciJrB(iO5
zARH2ul<c4ZoiSvN<_5%T71!NxTd04KHi+s6`)O~aTRM`K8EX`s%C2qG1Myd**KGXn
zZ9vb~Y=RydEP4lo9t3tBdIyX}FWq9%V@rhI_p)o-MfxT)e60ya6$w4085|Q6ekdB5
zHFiYSU_@vbT<XDMtU+49Pu(!&bJE?+;wE!+H%D_E)*Q{r9EYPhSYzj4O~V}BW_6oc
z1^Sq*n>Ps8dQA0?o$!bWVTowTFgTX%b5ID8Y{*u56DDx4S?e=(&?Xv?4*}h2*80tA
zuS`$7>}Sz^b!S`wcAi~BDNS&;#TG5u)S<R!6BeKWqaI~{JpY92i^Z<j%z&LSQ|l8z
zQc4>pur-@ZfCfx36i<L6$UzMRXXl0qHtGL<%LmB8<_KcRe;@=m8SV%1aGNc8dN{kH
zt39yWTe%aqVH-^h*cav4=ZdtsH@`-kE5l7KREysFKJtan<tF@PFIS$y2?36O%T)!&
z)x-mX#koS@J(0Uos03cFh5)<IliOq@*ON1#K0&dD@VPAULZy;J<S8iQ*f@u=5Tcj2
zoEwS<K9&rD^9RQ5!`fmF;lcnQFq|;;F*{OjwtUyC=5Tkoela*#OnS>YwYD@bAJ^}S
zjtw0HuiURQc@2qzhzM%Zatj!(218`IF}7jDsNTY2c;CHAVA?&?E<FMFI?Ifc;$ka<
zJiw%gsB9IV$QTbom&T&|c_v>)qIz#-(Ra_X>Q|V2g~=~7`7)DVW%5f*eu2q*On9i_
z{ydYvg+$27j<mMpE36~-S1(_g$X|3XDgNk=j=ZdrD+85yjK4g3WkOcy*mc3tlv*gP
zD17k=I2a6d<;y`HgO?F#@@B$(!7nN_SFiTVvvaB+q7`OCCFDv`cxf4BCu$-rULL%q
zbSitglFIu+6!kAH&&(iVL>$Z2#pO^i4A3x7t2xTjwFruccoA}0InF#U0ubs}$SOt~
z(T|y$!n_H_wu%9tV6F_}1i_`N)<~!=lPL%XX^wy_RnAwAxCG*?(DIeF1~FPbvahB1
zc|PxPaUn%7fVkis2X1$rDd#345r<(Nmot~5bP#rO*7<6NpPRT#+SkHh$PWA!@b{u~
zGgG8|#-B?>k^TjB8~!Wc0gcMrn+>-ya)6Pj28Fwxy`aIhmXPsmu!_jzBcz(kI*ys{
zkzOKeoR5e%T;oz5Z}&fA$VZItRm||o6uy*0`d*YK@%bgxSU!Whj4<H>7kI7>e~=e@
zVFO?RP^CN}UQiJmEzE`p0+tgX@YaDH<82kO57!}Jx&s0sCJXUd74cF+9RlVJAYg`}
zis361I%t#8?D$O;VF}d1TWH}EsEfAn11$SCgUB|<k77o^8fal&m^1K=WH87|-w}17
zsS96$V{~(5UbXLW;3iaIcvxGnCu#-cFhgYU8d`0H#6cB$=J7rRr7{bCj-^5(^k1#3
zO0{NPQ;#|7d$+=5k;xL1WhOt(<OL>QWI{#h{w$N<WAaTTdX^#A2&TL5vho%aZ4mzq
zxgoBr)pJeAc{s&No$#>4F?F880@X`&{=p){>~O_-etsLF^;@W77&5B5Q43hZK2Z&d
z?mtEG5zFf+{Qpl;UKwvU&O!T!u<yalh$EkxM-co-dcOxsSN<XcgKBu{&oXeO6YttO
z)D&7pO1ab!u#D+k9Hq{HI}MS6Xwc|yGW~t9>>zZ|q4O;iQ#3jh3@kWov<G};f<^c&
zlBx8#d;}ntkhJ_30d-24Aho}zBCep@&RgTo_T2~xq#hs`=jaUk7c=0E8@$bdr|j=I
zsDxnJ$~!H_ZxcpVBw@Cybx`y6nWf(lxP8iY7=oYO01oRp1&+z^0*{^SXQRpR&IKGN
zn9q1zIPp%6gjVG71_n~RdZCHakn2Xpi$W1U`WUmI%?a|XEsWQJ?2JFujMnklrCI(|
zv+UL^P(qkxYlQD!vy_#Si?=+Yk@u!X-m5Nnp~#LrkzwEcQ6)KXQJr(DIiQjEM<X9J
zOH)x}Jo3TR$OqNsP(1S2G%~DR@Zpef{uOmTq8hj=m>15Cczo0>{gbFM9{Ff$<fA4W
z;_DoiIr9)4^o0Z<1*4FHD5O9D>Cw6ZLot8cEVUw94kR9jXf`o}E5KmP{F9Vhg!ZE<
zxLi?Ht69NK>=X1HLcAU#2L($!JT)fwx{X17#7OW)2SV-h$LYUt7`y|68hGrs2D%b3
zr}X5snfz%1*kZfMpD|14v$t1{aroh>0}SebM&b`Zq-PASNmFZrgs08YFJ?u;Gsgd}
zNO(3)f^`&8RVQJ~hamyarzPQOk$_>g#3a}nFd8rk33%NF00{{nGfO`L2~TV!0e~Rk
zV~r%d-gJBEyG_+mvvS4uctMdYtjBX^{+!r^PRDI*=qI8oM&z)B;dzZH#zq|ZV$*Gi
zL*yfI6G)td5ibnnWF#jGa5Lc1)RMyv>uf;h<G2F*!$J;zPyttate=n!6uUcx9CbcC
z7aVQS*GY|=at}&LKkix#2k_tbF1Y^yU(){C@y3RpXZas8xr9V!F5h<KHdL+o3+{=y
zHn?wN%0XCytt^Xmrm`H&Y4=mVay4`>6P&g<k2Qg4YFpIlraT3<^L7H;UuOr-hRa|J
zcC%GP_`jpdU11}keQ&XXV{jKwhjBm2q`o815s3REhve32_vk%@(neh)07r@2wR6AB
zc5UC-c&O*2Jyj!c&vyTm^+XL`VA1^)t3JajZmv<VI}h&`w`BemtBHcoWrQ!Q;dt~d
zXT$wBY!{IjNttP8vZ&_JpT@WQS;Elkn4V#qa)e4*60#gP%@B7{*$eJ(GxuF28rHAX
zLial?5v=6b7IndZ#Dl-MRbpRuoGujasK8lf*h%x@3KeqfIUPfjEz>dh5DM06_^2Uy
z-8O-o(O$WnT>kqKt%nLHY3XwIIR~6B=V|Bp>^2-H@8u`+q;m}M`F30Lu-w)cairUW
zgXV+i(~TqD9*nbple#0>F6S}p2Qbo43#o`t(1dlNF%lou{@kBuaLTswUo&vcle0Ex
z!{Mp#XBfuH%07YWut&DmGGTn%p5&IWce4PiS7CW7XluGcP2>7KEEb%it~|@a@YD%f
zs<T=^ZUs@CeVepA^m~U)*mtSxVX`c(*v+KV_}}2}PkROkbtrspk6&RqVFFAKCETN3
z4I8&6#;n2rPz@q@2)YXoobkdE$MPV)`ZJIq;0FU+2gnZ`XAMz#L^z{RUXRS~KXHIJ
zMkI~QLG8n`CK%2h;|gm*^e7Yzs&*Rxi|WQb0(u=N;RIUH94M(dU&S&2zTfy?PvEgL
zL=!f`VaJ1=1JatKgKP&m;5vY1sQxI8_@EH~QYOMKLL4(UBF+kn_+Se0OX}8uW=!U$
zh~G)zE#eI%i$IIGmdn0Sl13ccQ=Q|mw&3XeW(HJJw&11VhzNpJg@IIu+AXN>XHw)t
zp0IQqBx32`ncLwW^MDp}ley(A9Wl4Lk-heMeK(_mPBxQkg*yn7RcqBJ7ya5i)D%=>
zuH@lZlHNxS@jzYm#buZ13_c2>6%Ze+UcLI{>+8<*e+`3cgVJs!g@{9RMbgHG_M*}K
z8j}$wP!7iZb0+@+iC_aSbhn~j+Dmal>r=_Q%A&#j9m2oLgvwS|su*XQDdQ5dRo>w)
zG~|=;rG0I`9wBkR#m>}|E)}@@`$(+E1sZv?3Hx*&;W>F+tU{1TS47>b1iwabPL0jL
z{R7tgA(KC1H5Dzod-w9i*&%8m>j6p+@#hLKIxZYicVlC(RIc36u2LS4ECZ|hzrlY0
z9EtQlLQH|%Lc;0bsqb8EW`=yyuCRl(NlY|v!=`paLgZ8M)5zp&i;qy(lY$#oBK{{(
z3noxDE!{YI?SOaKg)2DQ@%==mFVpArWx8?ErYF;j2oo>u9K~mU1{&PfL5nxkIW10C
zmaT(MtJUPEFgB^Ygir9>*uG$0Lwi##TIHzV2c90*-~mF4$OcjbG?3z647y}#f^LBx
zrSidfz*Q>3NV!5&X6Ow*fVmE33AYXV1QcY({M^59gMtJ%A&z()F$7ipRyzMYcCjzx
z&Y~NB$pQ^<88A9dXYur{e7TLDra*?LzSPETWrtaV3n`~^Cs5;)?>a0akM~hs2bXL{
zP#(ekq7gpZZKiJoH*7|@cj-jg)^yk!sCO9fg4u$am<H(ifsvP!B1j?|6#t2sWEo?b
zgi}zfk-?g79@TQ|LNEtZk=7ow0^7J7+t|}Q-NAVcLOe{j3gu-72MIW-wKcXi(s)bD
z4*uUpUraxUUN{+Ip~)6@GGY`>($5`==QB9&0fYLryIBtx+ZHl#vv=@a<h4P=U6try
zSMAlViVk3U53wDd!mf&+AOL#3+EsUDkcY-!YRbsJ!NOmT-;oDTuz|j-Qyk2zskwpv
z$2Pq1nf&KziS>ek#z4fMwLNBKMh@~I=KQ#`2LG_ntl=yYhg$m;3Ih$`Ud})lt-;H+
zUa?i<f2kcBPPKlM!$&bE7Q#u6-wbt-1EU5oBvu3iaE<I3dnGnkvY3!!gLnw?U;rk!
z<DqEBL(TqV-+btJ`!!w|kB2}B@(+d13Hqy9j7)Ozb0G-VkmLl*1IB+h!{t;nmoum3
zwB9?G6PaW=51Cc`9|WO?WuG0EM1$XZ&H1HwGx&cSA`*guduY21PyG$EcEqd#06#eb
zK^i0r@D%+p8&!}K*g28zr+0{WL-0;hs?Dyzq^fAl0KGoQ<3Wzn2x<9j2aX_j#*lVC
z-p%@9cI`0}f`-TVqXyw&{;~=do`x0T1P!RazaI5&MV5Z6X&pIP6O1=EpiFZ1M&$T_
zx3sg?l5<SpjpVE!#gbz?H<AOzqiT}#TTN>S5)2V@Dy09=OwxunkcOxNCO=|2gYPu2
z9wp6#tB9W9YXCnegAnnCdWMa^-%Pa$H3oIOO@ZX^vp1A5I<Fn)JGdw!vx;}BvAA^t
z|6U6hitLB(u}hYJOkj1Q^GAi$ZnO5dGDF6HP2IS@P%Ws%o3L7O4wTfy2I(>5-%sG7
z=%NW5;q2UN2ua}L1+*TNMql!X;eq5Mi!j?>Cq;tg{7h>sYnBAy&`1I+ED3v4B=o9V
z1Hy^WHll_F0SV_5cuPV9@gmhCj=nggRs_h2NF`Nu67Ul=^;NJ>{V%mry`iUN4dD^8
z<nzDNR2x>!>XhGYt&bNGk3Q674I%@H>AwTB=|mf4Xgk&&&G;Hvdy*kyG)J=4AN3nJ
zi`}1=@MbO8G;y5=*dY3bLn_Xerh_tNHAAcM-)~!e!puXEkZL;%s;a><pawbtj{sn>
zPd1seYuN7_q1mSJ>{-|$wA=~RVwT=-<E+?YS#JAlY;8+pncE@cR6nDE-*(_b^K}z~
z&#GBFVOCF=D29l*A@X>`I|_}TWMqv6Ji7L3%-T~XR4h#!l{HG1zHaaz4na2@g8EcY
zl$h$P?VC>JeE5k-Hy8|DP74?O62b_U5PNsu$jg}1z9JsH2173KKPLDB8m<cGt`Ak;
zI(@8u>)>c0=L4^f-z$f6b%4@+oFtZuIs0x_G7jjr|KxngK*vd~oDf$&NeRsXBUU&3
zTG7L80(duZNG^xBqozIoobGR4#Z`lIK`S8jng{Z<<gXT{ahtT1i|S-yPvEYxyA1}g
z+?xo4|8-&ZD_`cnZ(OiHGj$rh#{!-LYvkO}8DwpMj&6)b!dvp5b&P(OjTq9{I33Xl
z=5W+q@vNsb!X|l0bY6Jd!re~qKpqrhJBb71?qq!yRdS~A4oDDe>P&SLPA6kM$YCqT
zxb^ocT;6$?ljMFIEv{Uif6W|S9_#$jnTnjB$W{6l+lZn2F($nngBLbaJ`_1pSfrzA
zZ@LoN3-Ci;oyY{m0^ZN(r2-Jx&W?vsiwy*4_=_b*==xD9P+QJ>i?!n15S;}ZMjK!|
z5nCYP>gn-~LVFQlx)2uU)QtCw&`dsiQ9JXMD9;JKuomJLEVC7xC8~I}YsTFZPlBkS
zAG(7vPSKK;h6~Z}k72k72hHd10;JqMd4RLEN{N>V0=>@bCLW->n_%C;Y<6~Fnztu?
zLcNIB*aCMiC&kCuYY&xE`NHyw+NYJHQ_8Vb;7OjnsZn<t1uLgjXNu%U-WEwGC(-bM
zLVW8x+xI5rVya0%_xxIQHea711*9f-AL-gp#JNJnIlLaF=Rj-0Dy~H!QH&%ZY2nI5
zuf728qhFukY*Ip}af=|J)R?9gc%kXPE_kw?8N?p(vI^=m9q>-~IIrTKaSYWCkGB(^
zYqyg_3=$qNUYY5OePH&?ZpXcg7WT;OL;SMCY0n&wzIUs%4Xvzgk<Z-W^fz5{jyun0
z1~Rw#jaOI(5dVB8)6yhgI^{Rx6&!lgS$No~TFyaFnxpcY=>VdqyYRO?`y$HS_-xN~
z0MkL>%~xaqLy9u3PCsyF88yTk6nEJZU+NQnF5#ndVK~clPi0HM9f|LRkdJz<?b`+e
zEHbDep?`Ft2$#yM2?(X{fKY7t5so!vh#>9}TK63rl3ZohVB<#Gl$b);y<kBIG~G<s
zGX{mDkKM9X<%v-zzt9JktLSH21}xxxlre2Yx??8|t!L<>Rmx%npK?if{(c_!;_`gk
zRzWBHlqKHm%@^<E-Q!@0gjkIxJwV>``KFe}qaJsBgdC-oZzD$;wlcUiG>FLkP8*A9
z;OyIICiO9M$uTfjY$eC=9t*;{VkVtoFc1fDmc21pY=Yt{KrJ7$Qn8wcv2q;i?Q(-m
zX-RE>+U+LLu%2TiV>)4!dy^ojdNju#E_cEm<QCPe!Lv<%70D;Y54lQ`!dA=*DV()Y
z6|e;&h~SMlM&?+yfN2mVR3hqtQ{ul%Gk19EEA?Yxs7qLuI0QzdVhx7^fKG~tox?`e
z?{Edb6X)R^e@-EE+}sC}sK<f<OcO_a;VasD43(-!o>Gkn1I}A<NdwU=%@xOY&YGpO
zdQXgm=m!+n-~uCzq&(*VMjFq)U?#uV^v4Q<?6D697pPR`Z9ZP3T(`Kg<QB?RR~7|V
zUpq|xo>#b2xzAsHts-cR9|h3*e}RKi2(+)@tyui-t0t2E;REEJN2|79J&-pQ`9=6U
zKaF>Ebwx4IO+@$K5}2181!sRjfY$^yvTg^eVe}Eo(ovwckFrE;4fd06z8Z;G={EeG
z$5Xf~vn^Ak9YVR{w`A*=k%4SMbeLjsAscOG<<oLPppoFlyhj=6<X5Zx((fx-X^h&j
zwpr>yEE%t9Ofe{mA#W`8nH3!4GrBe;L2Rew;dBSm0A=TYp5lo4ZY-75rPUyF8@+C0
z$_7;k%8a!p^}rqhwL>JtYBxR8<hT14S14I4#FB9Kyl%B+Y{GK10d;c1n!Q?*f;O5*
zxGwl*U-Jytg$D~p`*=&{B=*l1rwKbNt$?P?)@+e#pSaMZ(8}-_7~23e;mSqJ!so<7
zJm*jWP>Arn;D|oMX9gG2QAFkWC4>l+8>DL0)8wG;Hi*NDVOh|MhiQ0fAN2;~)jG|y
z8<RiDP&9)%lLU$v02D-u@i?-2s9<Wl#MbW8%IPWLLWqwg*F)AGMX>)PyuPVt^afc<
zW1lP~8@I#ElIkQf$p(HFg+>X4c7z_pfBTEPka#In3Nqd{!VN%P$pHzms^Pl06gNrV
zP$Q{&#wBwi`V(>m?&lYMxMYt|hN!vTCc$k$#^9MAaN3-)p(jz0Fz55UAe+z21kdO3
zLoUk|6y2wYJ6%ZkX(rDwIl|;9lVeOCXL689fyp!zNRDyuFzIDN!FBI4xyNKLliz3Z
z2TcBu$saM{o05_T_ZL}|Qz-xM@3Z(e6Z(kme_`&wG12iI{ZTi^<TWO=$*i-;-O;65
zbNLKu)m?I;&RFF>>g5Bs74sazCvcIp&_?H9{GHChmThe7&UWH1bmw>5kF`J7-qfCL
z?`ofJzuf*O$hRZk-@dzjyG56WP}2SgK}2`Gm&0-;-~(e^2^kjHE+^jW#ILQ?eEGog
z@%SSxVXX#mst5bz9U@as#KR&8fghUVIMqdWia<psr0o&&OA>e9$`~(<n}CY913ryj
MkB4mPW&K_M56e+L%>V!Z

literal 0
HcmV?d00001

diff --git a/bcovington/src/parser.py b/bcovington/src/parser.py
new file mode 100644
index 0000000..45a5071
--- /dev/null
+++ b/bcovington/src/parser.py
@@ -0,0 +1,175 @@
+from argparse import ArgumentParser
+import utils
+import covington
+import os
+import pickle
+import time
+import tempfile
+import yaml
+import codecs
+import sys
+import warnings
+"""
+Main file
+"""
+
+
+
+if __name__ == '__main__':
+    
+    parser = ArgumentParser()
+    parser.add_argument("--input", dest="input", help="Path to the input file",default=None)
+    parser.add_argument("--input_type", dest="input_type",help="Style of the input file [raw|conllu] (only use with --predict)")
+    parser.add_argument("--pipe", dest="pipe",default="UDpipe",help="Framework used to do the pipeline. Only \"UDpipe\" supported (only use with --predict)")
+    parser.add_argument("--train", dest="conll_train", help="Annotated CONLL train file", metavar="FILE", default="../data/PTB_SD_3_3_0/train.conll")
+    parser.add_argument("--dev", dest="conll_dev", help="Annotated CONLL dev file", metavar="FILE", default="../data/PTB_SD_3_3_0/dev.conll")
+    parser.add_argument("--test", dest="conll_test", help="Annotated CONLL test file", metavar="FILE", default="../data/PTB_SD_3_3_0/test.conll")
+    parser.add_argument("--params", dest="params", help="Parameters file", metavar="FILE", default="params.pickle")
+    parser.add_argument("--extrn", dest="external_embedding", help="External embeddings", metavar="FILE")
+    parser.add_argument("--extrn_cpos", dest="cpos_external_embedding",help="CPoStag external embeddings", metavar="FILE")
+    parser.add_argument("--extrn_pos", dest="pos_external_embedding", help= "PoStag external embeddings", metavar="FILE")
+    parser.add_argument("--extrn_feats", dest="feats_external_embedding", help="Feats external embeddings", metavar="FILE")
+    parser.add_argument("--model", dest="model", help="Load/Save model file", metavar="FILE", default="bcovington.model")
+    parser.add_argument("--wembedding", type=int, dest="wembedding_dims", default=100)
+    parser.add_argument("--pembedding", type=int, dest="pembedding_dims", default=25)
+    parser.add_argument("--rembedding", type=int, dest="rembedding_dims", default=25)
+    parser.add_argument("--epochs", type=int, dest="epochs", default=30)
+    parser.add_argument("--hidden", type=int, dest="hidden_units", default=100)
+    parser.add_argument("--hidden2", type=int, dest="hidden2_units", default=0)
+    parser.add_argument("--kb", type=int, dest="window_b", default=1)
+    parser.add_argument("--k1", type=int, dest="window_l1", default=3)
+    parser.add_argument("--k2r", type=int, dest="window_l2r", default = 1)
+    parser.add_argument("--k2l", type=int, dest="window_l2l", default = 1)  
+    parser.add_argument("--lr", type=float, dest="learning_rate", default=0.1)
+    parser.add_argument("--outdir", type=str, dest="output", default="results")
+    parser.add_argument("--activation", type=str, dest="activation", default="tanh")
+    parser.add_argument("--optimizer",type=str, dest="optimizer", default="adam")
+    parser.add_argument("--lstmlayers", type=int, dest="lstm_layers", default=2)
+    parser.add_argument("--lstmdims", type=int, dest="lstm_dims", default=125)
+    parser.add_argument("--dynet-seed", type=int, dest="seed", default=7)
+    parser.add_argument("--disableoracle", action="store_false", dest="oracle", default=True)
+    parser.add_argument("--disableblstm", action="store_false", dest="blstmFlag", default=True)
+    parser.add_argument("--bibi-lstm", action="store_true", dest="bibiFlag", default=False)
+    parser.add_argument("--usehead", action="store_true", dest="headFlag", default=False)
+    parser.add_argument("--userlmost", action="store_true", dest="rlFlag", default=False)
+    parser.add_argument("--userl", action="store_true", dest="rlMostFlag", default=False)
+    parser.add_argument("--dynet-mem", type=int, dest="cnn_mem", default=512)
+    parser.add_argument("--conll2017", action="store_true",dest="conll2017", default=False)
+    parser.add_argument("--predict", action="store_true", dest="predictFlag", default=False)
+
+    
+   # parser.add_argument("--conf", metavar="FILE", dest="conf",required=True)
+
+    args = parser.parse_args()
+
+    if not args.predictFlag:
+
+        if not os.path.exists(args.output):
+            os.mkdir(args.output)
+            
+       # config = yaml.safe_load(open(args.conf))
+    
+        print "Training..."
+        if not (args.rlFlag or args.rlMostFlag or args.headFlag):
+            print 'You must use either --userlmost or --userl or --usehead (you can use multiple)'
+            sys.exit()
+        
+        path_tmp_file_oov = None
+        
+        print 'Preparing vocab'
+        words, w2i, lemmas, l2i, cpos, pos, feats, rels = utils.vocab(args.conll_train)
+    
+
+        with open(os.path.join(args.output, args.params), 'w') as paramsfp:
+            pickle.dump((words, w2i, lemmas, l2i, cpos, pos, feats, rels, args), paramsfp)
+        print 'Finished collecting vocab'
+    
+        print 'Initializing blstm covington:'
+        parser = covington.CovingtonBILSTM(words, lemmas, cpos, pos, feats, rels, w2i, l2i, args, 
+                                           path_tmp_file_oov)
+     
+    
+        if path_tmp_file_oov is not None:
+            os.unlink(path_tmp_file_oov)
+        
+        if args.conll2017:
+            with codecs.open(args.conll_dev) as f_conll_dev:
+                lookup_conll_data = utils.lookup_conll_extra_data(f_conll_dev)            
+                    
+        
+        
+        for epoch in xrange(args.epochs):
+            print 'Starting epoch', epoch
+            parser.Train(args.conll_train)
+            devpath = os.path.join(args.output, 'dev_epoch_' + str(epoch+1) + '.conll')
+            utils.write_conll(devpath, parser.Predict(args.conll_dev))
+            
+            if args.conll2017:
+                utils.dump_lookup_extra_into_conll(devpath, lookup_conll_data)
+                utils.transform_to_single_root(devpath)
+            
+            
+            print 'Executing conll17_eval'
+            
+
+            if not args.conll2017:
+                os.system('perl src/utils/eval.pl -g ' + args.conll_dev + ' -s ' + devpath  + ' > ' + devpath + '.txt')
+            else:
+                os.system('python src/utils/conll17_ud_eval.py -v -w src/utils/weights.clas ' + args.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt')
+        
+
+
+
+            parser.Save(os.path.join(args.output, args.model))
+            
+    else:
+        
+        #TEST PHASE
+        with codecs.open(args.params, 'r') as paramsfp:
+            aux = pickle.load(paramsfp)
+            words, w2i, lemmas, l2i, cpos , pos, feats, rels, stored_opt = aux                 
+                        
+        
+        stored_opt.external_embedding = args.external_embedding
+        stored_opt.pos_external_embedding = args.pos_external_embedding
+        stored_opt.feats_external_embedding = args.feats_external_embedding
+    
+        print "Running model with this configuration", stored_opt
+    
+        parser = covington.CovingtonBILSTM(words, lemmas, cpos, pos, feats, rels, w2i, l2i, stored_opt,
+                                                                                       None)
+                            
+        parser.Load(args.model)
+                            
+        conllu = (os.path.splitext(args.conll_test.lower())[1] == '.conllu')
+        tespath = os.path.join(args.output, 'test_pred.conll' if not conllu else 'test_pred.conllu')
+        
+        
+        if args.conll2017:
+            with codecs.open(args.conll_test) as f_conll_test:
+                lookup_conll_data = utils.lookup_conll_extra_data(f_conll_test)            
+                    
+        
+        
+        ts = time.time()
+        pred = list(parser.Predict(args.conll_test))
+        te = time.time()
+        utils.write_conll(tespath, pred)
+        
+        
+        if args.conll2017:
+            utils.dump_lookup_extra_into_conll(tespath, lookup_conll_data)
+            utils.transform_to_single_root(tespath)
+            
+
+        if not args.conll2017:
+            os.system('perl src/utils/eval.pl -g ' + args.conll_test + ' -s ' + tespath  + ' > ' + tespath + '.txt')
+        else:
+            os.system('python src/utils/conll17_ud_eval.py -v -w src/utils/weights.clas ' + args.conll_test + ' ' + tespath + ' > ' + tespath + '.txt')
+        
+
+
+                            
+                            
+
+    
diff --git a/bcovington/src/parser.pyc b/bcovington/src/parser.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e309ab49c6418551993256ceed56deaca7be950c
GIT binary patch
literal 6247
zcma)AU3(M96`qwXgTW+D2sQ))n*;|#A_<!$<b#m#VH1Kej!c?llXSb*j;)nfyUWf<
zveTqr^t)+u*XL>b2il*|oBn{_^}aX#1HI|fi(a+wIkS=mWZK#wotgKXGv|8-#{O~W
z!0CH)A2&qtGmY<e@K^n7Qiu?L91#oAc7*2$ht?$#mxNbh4D^`r#`^TQ@W%UeS$O3>
zJt4e_KD|$P`}*{L;qC9!lfs+q(+7lipidtZ-a#R!L^vk4OCta0q;$l6A-qFE9ulp`
zM2Hh{5cESWv_VNp+;_&D0)`^M2N|9MQ0fDR7=QvuJkId6E%yY&Pcm>=gind^2(!+B
zX}qsvmYI$+(=m(bX`AUdGo4_jXDp_ZHq$9)a+&E_i|IL=>3L>4%}jFzljZ6Q44+|m
zeiS~-@HvKG9ED$ExWaIC6s|FRp5cW8rqC_g23`>1%R){w>nj|tR{;-lanXWbWB7F&
ze#3&_9EC4g@LLRDwx!>;;CDvPzGA^w8NOyqU$@{ThL>&lh6TUN@J$<j&w_7_O0QV(
z?NNA@VN-B-7+$lLzt1o#5p`T=c-@BYTJQ%9f5V19wBU~z{@8{+hE2vkVfa%U{>*~E
z$*^z3-?HF<;SC#Zh%gkPWXm@RmWOS4%YvJuaKvznrMDS=%9f5TxIGFd7Mza4nFZe)
zg>%a>MR2I?vpbe_cT~D(!8;GZM|gV82stBKvzW5`<hW=Z6|G~uLxV%8ugbJ}e7NHz
z7`(gU#=9hSLS%m^6Y)%+I4RVh%HijP9}ulmbOK9YPGh2coAyhh^(^b7fg(P|<b7Uv
zr-e5sycdLbMtJkWJ1e|%LcS;pi(V?ur=u$3J|_Mb^!fFo@Tx+VMXM&{^Fp2$a!$w>
zgghhUypU&wgpwJtiELMi-}w*&`TdW=FJVUb9kOLiw9d0A<ZK~mf*ORz758xkV8X-t
zlGvUU`R^vks0D^_4<Q_6)uPD%G9f2L3pQhUfn57~MtGP(@b?uVU#1=JRbfnk)XvE<
zd6Ai4Bk1~PPdMJ|BK)qnAzvrcaHYJ#Fo(%`C=%iKgnUwfGh7z45T6adk8eczO+N6F
zkRYJqt)V#-fWp*tA6a#onJ^X!!1`@kLl-CP2V?^~rhd<b@s0@Z7s_VCABtJ<&W1lK
zzJJU{I0>ST;DGff1J<AJ%}T~y*(<+S$sV%e-rpuZyoXD65L8?UKjXAt?5F)2Cl+zN
zF64FbDK73ul#;hp1fE=47II1W_ym1}X?%iymubw@pCboL|GPXeoMEXc4|sEvmB4)1
zUS#V%-nu1PE7S-8kga3kFX;K;-5zoNworfHAAThgykdSyj(V#skJ_qOVYo$#sJ8C(
zYwHf}*tI1mL~DY(dripqtwLZ~Vy=rJ&w9UPa9rI~11CQJ^<+^d03IllRd#Yv`s&Y<
zMcPe;+(Kngg{&48Lf=J&P(tkrAy%r8Jy>bbl*Ib{ukZwn{*9k`2b6vcA6N3NPFp5=
zEy$J3SM`4E`F=Zy65m(*u&7j`B<pBB4my-d>oSPpR#V2An!(mupqs8vUEP!}!|rAj
zOHC?5xfyg~t)@U1>U_PMNqwNNikIp#mg{;qmTtQFkRJDwJm`JVNRv43eCE#Md)MtK
z>GmR=TB&5Y45NlVt0wq}EP`?}m<ihZ;5w1&F39CPXv<!jZ<B(MOb(6J#1Th{th#qd
z{c}d<IahVEEX_4|AAPQgl+F=1eGrGRr9Q9K(^y|gl2iwz<Jz6ol@*tfVT9UJ2i+iN
zo0o5{EGZW<)oLvabWmHnd(~gR?q9&~`5FsWS-ql6R4SqDYP{-;Be7CPv3mGG?DKKz
z<fAl$qtI)Ds43Z$p~4p|)v-M^6Q5Ci>QSnRrO-|Fg?t4_g3@%5A=Tp`QG-a0mbo(e
z4h`UH7B#kGSrnw)(Rreu1PV&#Nf7(8y&=OeO19KtY%dkt?r^&(J{I*GS*r9b7>V%r
zP;wmDwY79z2U~6*wv|zeD;1-RbWsC5LKS8Izi1*K`<gaopp~ZVF}0Urlz?So+tVyl
zh3zzyF}EpG>M6*qq(N9)54zIjJriI0SK&q@?ILVCO{#|5G-+=TIj%|WR!Y%u-w&g<
zibCuaq0NRO#5r6VPO`k<%pc?&E}2uA5|LTjXqpl+WJHx)fXyfjF|^25L;9U0(n?Vu
zR4Qg`QB!OS+Tam3grdPvscdiXtm{QdnD+dQh$dnmTHuB%pm-q~Bg}N$_F~RW(FYgv
z0T4e3#D%svXG3Bc<Ow3}=NJQ9zx@sU{PFF=pR}XHDCd3vN#mAMnM*}uvxr*I&`~$g
zQJU~M){-33LDE#FEYnO!?dWrvvlqh^u>~Re&ZIb2x*Z3*=r*2MwDiq7Dv`g``jrZ8
zQ>IxL^Kjn3Bo3n|+)bpeC@DiO6(S<Co?9B@DN?~kEYmz_;5wj=C(Gdk8LM=fOMf$n
zm4OVGqKj!Dxk0D$@k}kp!B*kuMzj&JfgBK{OXnTQi6xMEO-*3dH)RkqM1-XVP#(9@
z#cW+3Gh|wcA=aM9x4~ga8MoWA&8cc6i4Qsw2wh<vkIlvN3m4h;!K$dy;>O}A6!bv(
zUAjh4D%EOLts)d3ryaN5!IXtLuEdQebGK3%nC+&6<$xp?-T7SzHiCrZ+9;(ci{)7}
zYIBLfi!<DA+6Xo@Rez7i`!bGJO<YusG>#=w)#p*qfafOKEDGZ2bK2&7fUg)JZ*bl)
zGjkT=uI1R_X7PFD^Zi`h1T-_3c4VW&+#H{Ui@xll7-(i@NQ$fSMh$qZYP4U?Vz;v8
za$mU><tpk+_p;fj>K(1Fkl?OvrimrmSKSF&do|m2D_ytJvsijE+G=W5ZN!0cIT+?M
zS98!<?<ACtVs!G8LpLJ@&y&q)tAmC{J@egUT<dU%>C*ElwS~VlZ6Dg}%+rH}Ycj~U
zkPwAeBkj<vVZV#oDI%<?F;RwmZ}N?3rgMZ_lcuJY?TlYt)atuIJ7&%X?t_NXJj8Yt
zGxp3ZG7H~NaP9iOQOoZ@o~%I_`Y?;1hkVXzicTjrc;v8&n{gUwqgb2A%Ux<evVNI+
znq~;QMT!(FA)@{=_IZ`z)yU_{oe1xXx=R4bx3@76>*qn$7wDr#0J#N5t)BovqP}it
zK6&h?X_w`=oa@x{yu>>%G_Lm+BR(jW?RH?cVr-F0eh!ddbd}TL<{~C2mzX|8q#5oT
zB3fyb)M-F*AmJPa>So4J*lA~)?!IgNmbiLzW&Q4LKDU$LZMj{)0Ju(rlRJOY=i>Cy
zA^aYR({#I&6`S<D2z+`1vcC^tyh~-UlLyI`tka``O*dUfPVf{d-rrr0GfmuH9%*R~
zU~J92ob?G{WWQe=V-826(}I~#z+q}L&D%b<6<Pv;&(qWlPKDaSG^7~OL!(X)F<zla
zLN1b^vooC3p^uxXPSL7UHtY19V|IF~aBNi;!`2;Lr+b{8i_;!%HFBH7)?p9OQ|uiD
zwI5IFGj0~Vr^uif#l-~mqXF>W8u%H0s7qMZnrU0&)!mK4YB!1l+`zT1NY^$XPmfmI
z6t&@tsYc_;>|&^}&6n_q>csLgzb5_)jnmH5=<mL%(xh|BIflP;#qTN5^gCC4_t*c|
zcNzLhN1VgsW#_n4b`F&$N@eHV*coTRDT8{}nH-yPrg7#G=jiy1GwDn_=bRH*Pdn3N
r<*`ZU#nOEFZ2743xN`=a_=P@5m8MH`&S~HzH{~1!GL02Uk<5PpQRctf

literal 0
HcmV?d00001

diff --git a/bcovington/src/utils.py b/bcovington/src/utils.py
new file mode 100644
index 0000000..80dd275
--- /dev/null
+++ b/bcovington/src/utils.py
@@ -0,0 +1,265 @@
+from collections import Counter
+import re
+import codecs
+
+"""
+This is a module slightly extended from original utils in BIST-Parser:
+https://github.com/elikip/bist-parser/blob/master/barchybrid/src/utils.py
+
+that has been adapted to include to support non-projective transition-based dependency parsing
+and CoNLLU dependencies.
+"""
+
+DUMMY_ROOT = 0
+UD_CTAG_VERB = "VERB"
+UD_HEAD_COLUMN = 6
+UD_CTAG_COLUMN = 3
+UD_ID_COLUMN = 0
+
+
+
+class CovingtonConfiguration(object):
+    """
+    Nivre, J. (2008). Algorithms for deterministic incremental dependency parsing. Computational Linguistics, 34(4), 513-553.
+    
+    l1: Word Id of the word at the top of the lambda one list
+    b: Word Id of the word at the top of the buffer
+    sentence: List of ConllEntry
+    A: set of created arcs (tuples (headID,dependentID))
+    """
+    
+    def __init__(self,l1,b,sentence, A):
+        
+        self.l1 = l1
+        self.b = b
+        self.sentence = sentence
+        self.A = A
+    
+    def __str__(self):
+        return str(self.l1)+" "+str(self.b)+" "+str(self.A)
+
+
+class ConllEntry(object):
+    """
+    Contains the information of a line in a CoNLL-X file.
+    """
+    
+    def __init__(self, id, form, lemma, cpos, pos, feats, 
+                 parent_id=None, relation=None):
+        
+        self.id = id
+        self.form = form
+        self.lemma = normalize(lemma)
+        self.norm = normalize(form)
+        self.cpos = cpos
+        self.pos = pos
+        self.feats = feats
+        self.parent_id = parent_id
+        self.relation = relation
+
+        #By default everything is assigned to a dummy root
+        self.pred_parent_id = 0
+        self.pred_relation = 'root'
+    
+    #For debugging
+    def __str__(self):
+        return "["+'\,'.join(map(str,[self.id,self.form,self.lemma,self.norm,self.cpos,self.pos,self.feats,self.parent_id,self.relation]))+"]"
+
+
+
+def vocab(conll_path):
+    
+    wordsCount = Counter()
+    lemmasCount = Counter()
+    cposCount = Counter()
+    posCount = Counter()
+    featsCount = Counter()
+    relCount = Counter()
+
+    with open(conll_path, 'r') as conllFP:
+        for sentence in read_conll(conllFP):
+
+            wordsCount.update([node.norm for node in sentence])
+            lemmasCount.update([node.lemma for node in sentence])
+            cposCount.update([node.cpos for node in sentence])
+            posCount.update([node.pos for node in sentence])
+            featsCount.update([node.feats for node in sentence])
+            relCount.update([node.relation for node in sentence])
+
+    return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, 
+            lemmasCount, {l: i for i, l in enumerate(lemmasCount.keys())},
+            cposCount.keys(), posCount.keys(), featsCount.keys(), 
+            relCount.keys())
+
+
+def read_conll(fh):
+    """
+    Reads a ConLL file given a file object fh
+    """
+    
+    non_proj_sentences = 0
+    read = 0
+    tokens_read = 0
+    root = ConllEntry(0, '*root*', '*root-lemma*', 'ROOT-POS', 'ROOT-CPOS','FEATS-ROOT', 0, 'rroot')
+    tokens = [root]
+    for line in fh:
+        
+        if line.startswith('#'): continue  
+        tok = line.strip().split('\t')
+        if not tok or tok == ['']: #If it is empty line
+            if len(tokens)>1:
+                yield tokens
+                read += 1
+            tokens = [root]
+            id = 0
+        else:
+            try:
+                if "." in tok[0] or "-" in tok[0]: continue
+                tokens.append(ConllEntry(int(tok[0]), tok[1], tok[2] ,tok[3], 
+                                         tok[4], tok[5], int(tok[6]) if tok[6] != '_' else -1 , tok[7]))
+                tokens_read+=1
+
+            except IndexError:
+                pass
+
+    #Last sentence
+    if len(tokens) > 1:
+        yield tokens
+    print read, 'sentences read.'
+    print tokens_read ,'tokens read'
+
+
+def write_conll(fn, conll_gen):
+    """
+    Writes a CoNLL file
+    """
+    with open(fn, 'w') as fh:
+        for sentence in conll_gen:
+            for entry in sentence[1:]:
+                fh.write('\t'.join([str(entry.id), entry.form, entry.lemma, entry.cpos, entry.pos, entry.feats, str(entry.pred_parent_id), entry.pred_relation, '_', '_']))
+                fh.write('\n')
+            fh.write('\n')
+
+
+
+numberRegex = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+");
+def normalize(word):
+    return 'NUM' if numberRegex.match(word) else word.lower()
+
+
+
+
+"""
+Looks for multiword expressions in the CoNLL file and creates a lookup table that
+allows to reconstruct then the output
+"""
+def lookup_conll_extra_data(fh):
+    
+    lookup = {}
+    sentence_id = 0
+    lookup[sentence_id] = {}
+    id_insert_before = 1
+    
+    for line in fh:
+        
+        if line.startswith('#'): continue
+        tok = line.strip().split('\t')
+
+        if not tok or tok == ['']: #If it is empty line
+            sentence_id+=1
+            id_insert_before = 1
+            lookup[sentence_id] = {}
+        else:
+            if "." in tok[0] or "-" in tok[0]:
+                lookup[sentence_id][id_insert_before] = line
+            else:
+                id_insert_before+=1
+ 
+    return lookup
+            
+"""
+dumps the content of the lookup table extracted by lookup_conll_extra_data
+into a output conll_path
+"""
+def dump_lookup_extra_into_conll(conll_path,lookup):
+    
+    sentence_id = 0
+    word_id = 1
+
+    with codecs.open(conll_path) as f_conll:
+        lines = f_conll.readlines()
+
+    #DUMPING the content of the file
+    f_conll = codecs.open(conll_path,"w")
+    
+    for line in lines:
+        
+        tok = line.strip().split('\t')
+        if tok == ['']: #If it is empty line
+            sentence_id+=1
+            word_id = 1
+        else:
+            if sentence_id in lookup: 
+                if word_id in lookup[sentence_id]:
+                    f_conll.write(lookup[sentence_id][word_id])
+            word_id+=1
+        f_conll.write(line)
+        
+    f_conll.close()
+
+
+def get_rooted(conll_str):
+    """
+    Returns a list of [id,ctag,head] of the nodes rooted to 0
+    """
+    rooted_elements = []
+    
+    lines = conll_str.split('\n')
+    for l in lines:
+        ls = l.split('\t')
+        try:
+            identifier,tag,head = int(ls[UD_ID_COLUMN]),ls[UD_CTAG_COLUMN],int(ls[UD_HEAD_COLUMN])
+            if head == DUMMY_ROOT:
+                rooted_elements.append((identifier,tag,head))      
+        except ValueError:
+            pass   
+    return rooted_elements
+    
+
+def get_new_single_root(lmultiple_rooted):
+    """
+    Returns the ID of the first VERB rooted to 0 or the leftmost rooted
+    element otherwise
+    """
+    for e in lmultiple_rooted:
+        if e[2] == DUMMY_ROOT and e[1] == UD_CTAG_VERB:
+                return e[0]     
+    return lmultiple_rooted[0][0]
+            
+"""
+"""
+def transform_to_single_root(conll_path):
+    
+    with codecs.open(conll_path) as f_conll:
+        sentences = f_conll.read().split('\n\n')
+    
+    with codecs.open(conll_path,"w") as f_conll:
+        
+        i=0
+        for s in sentences:
+            if s == "": continue
+            rooted = get_rooted(s)
+            if len(rooted) > 1:
+                frv = get_new_single_root(rooted)
+                for l in s.split('\n'):
+                    ls = l.strip().split('\t')   
+                    
+                    if ls != [''] and not l.startswith("#"): #If it is empty line
+                        if ls[UD_HEAD_COLUMN] != "_" and int(ls[UD_HEAD_COLUMN]) == DUMMY_ROOT and int(ls[UD_ID_COLUMN]) != frv:
+                            ls[UD_HEAD_COLUMN] = str(frv)
+                        
+                    f_conll.write('\t'.join(ls)+"\n")
+            else:
+                f_conll.write(s+"\n") 
+            f_conll.write('\n')
+            i+=1
diff --git a/bcovington/src/utils.pyc b/bcovington/src/utils.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e5636000d5d171cb67f4c1b9192a20c8d3f67ff
GIT binary patch
literal 8418
zcmcIpU2Ggz6}~gOUhi(~ICf&ECayYFXuC=Mlai)C!K8KVG<B6YWt`NBo9(cknYG8B
zomtO}V+X~&q#&URMWO<Pgo;3-Z}5Z!FBQ*-CxnD}Mxx>o1n=;D=gxZF5D-$Fwdc;c
zckbMu^Yfi^?iBvfU;Tdc%uQG2zXI;B<4XUC!o$CzLZw=US~iryx~W1_Et~QTbU`f_
zl!vmYyrNp}QC^Q)E-A01mV2eWth`?3S5(tbURiyh)IFtcE?1RT>9kh&S`R7j5G%`l
z%Ig!NUwQpP99F)m)(4b#Slz>7jwo+H+6R?)M2I2f4GM8oc|$@RQ{GV_hLv|rh!HM-
zu{MmY1^>dYW`N3iQOw3$QRXKL0ce#KQM`N6ej_M>2*{Fb1jTH8JBU`ZIGT;4X0W=I
zG_oL$_E^Wk-oTYwD17V!J4Ojs88SQgWC~GK_F|2rrq}RJ)xvKfxSjap*4vX-?Mr9Q
zymD&Nnh96qB*@lUsnv`V%k#0kRuBbg7PwXrxryKMqpT6)N!yP+KXUI_?M9Mfg_G87
z+-h%SvOM&-jJ7R#F&($gzF2$l)VOu-g|idq&Yhi<*`)|yIBzY*iD%7wR@}6*HQ(B1
zwUG&(#qCaW*l4YI4J(d(E5xw!_R9bJ^2%1T=_m3|iVa}h{&}o0%{V$38-^F7EV&~c
zX3krwFAuQYM&@HNiJMxrY^xpmxUcyQZ+>pP!+~sm?$jxH(d_|ud6~Fo$>|!3yN0@}
zlmo&P0@Z>Ls1}8Qh!hlrl3b`!3o;H6zL2qHB`c%sELxgoYMey$CH2FmO`+O+&t|(#
z_2I<2NWTnfYAtU0Q(ohC;7#5R!bak!Q>#HXwE_-IP=0<gwc>UZZ7NONsjVys)5-Q7
znci`r1DWGIg|SkuN0}9ae=e8Ei))ZAPl3p=S*6wu8V%I&cwRSIKfbS_APtK(Zg)Yp
zX6gNNv@}ch?MybsiEJ9q0d9}OISwXsoM$;J*P;x~VeZs%qDISioUAI9R_twstm^l?
z*mWG6{p>?1y6Y0^QF(83F~p6O+b@+%W~Eow(bZTN59CLdy@4y`F8uvCAV^6x0D=N9
zH5&8~4X88eYbZp0F=*BZqErkoh+zLMv1OV_12zZ43nEan@xtZH6F0185c=AZ-3rFw
z#r4kON>dd7srb-q=suKy22r4z!zS`7OhH>ikGhM0bSSC2_y^Ie?&2RrS>44yhziw;
z1Q1<Y6+oOeL@w-u(<F|wS|8fPq5@BBh8xSc+K4+0gRlD;?I!eF*e_Yab|Kt$oMs%7
zclK!l4^nv-MgtsiFeHVa_@S^WJIv0B?>XJ){hg*xryA!LHMR#y4Y4@Jf^csiXYmA!
zQ5Ii7(N(KQ)+nx)CYo|CRiF(I;7?mKO0>8go94<zn%6*p;b8STSZRphVBZ2`>ms#v
z5!t%PYh8r3E|OZWP)zGpDG#aDin^DhKtUbX3aAbLJu$pn<AMTpEi=kuJq{ufL95ZW
zX@U>(jOOv*8TS|vLkxWev*u8-P@{3$)H0hKv#A53J`3<wj}IFLFwJRP8XF3w!oZ+N
zj!dG4iR69&1^|8UDYXl+X~OIfut42X><~R2qC_!6a*#R%cK>f`w^!|aCW7y!#m?3L
zq;?@;gIcf(TQ8$&K<z@@6$O=e-&C7x>}bHkAJ(Nux_qd+6du~8Bwc=?yA&VVrB}NA
zUU%tvXqU2d`KRtudT5u5bUAKtxtrJ3(h{KLt2eMWjg>%L?(-12$gShTrVxF80N&ym
zHu$+`*p1YIXE}kD=@!f{M6(R6qIzrPPh9G00T<gAai#RMfw<B7u)1YZe7it8nffCz
zw}b)EC5HySDiAk$MhqyBa=Y8(QE?-sR!w_>o4E+`F5E;-)W4z*V(IotjAwt5n;;{?
z$F}a}?z#nHY}B=_#t3J{wIby69G4McM!Vi>dk6t-L78KyKL|C1XK@IAwAJzvoBLw=
z8~&ZNMxM!-G8|k&wkA~uxT(bc8G{CKJb9glORuA!60G+s98LUB_Zd8jt}qypN#1-%
zVnA6#6np-`aZ>!>?bvOsyot$D3ab+Yit{jsjHBjBqw_amju~Ud3GGa{E25%{EBzTn
z3|N6evy?n#SCB}17}%AEC<Q4U@;USY+?3U>L<g(r-=lV3G5}0-^+{t_A_Rn)Nbz8l
z$@^+^Rz)xxm>Il`cqk-%cm(u92+@~*n>VkjFw%yplHXAFh&IpzDd0v?B|m5NU4j-G
zUdHM;T#@}TGuK<@9CtourpTcTHM&t!>7SV}cI(Ms)wW&<EMG7cx|Bc0T%-=EQQi*g
z+kn0u(YHZ;8&ac4B{vExnKAPB(S-LSu;`eKGK{+H-9TI=D=w%dEU<|1bfc=0wt*k&
zH*hUIxYzrJ?3Ft-)g^C4U3v?nT=I^C@SXs^F#br-qLA>=hGC=@!@ubAWeNVQ)!??z
zh>($>6|b!OZe}&tq&Kh&FVlifGYC7K(mx70A^Lt=(g^$N)oT;)TwP45AyS!TjjI0U
z#hGi16KoUBOE`+4!^xBwQGx3MLXwo$Nhv1+Mxc^A2ZYjqL7U{$^4O95JD|0sbf2B9
zDz#|!lPRq+i#L44z-(Bk(Ex4s6uV8cMv|r5NRMSn$QgpRrrKeUNt6--MY!oko9VGM
zA%2m!=aDY&T*N*S2>=DIn`;@PzbKBJb`q~UdkYc+<{AVc^qL&j?IT<br_hbkk+HeV
zg*J(g4$kat4u#}Zr7xjSJtc#GPa|OLHwRS77&b<XA*09WX9#H)3=3_~8?PCM&3>a?
z7&6LecPSJ~f_t6CmHrY11lSEQeoQ3T`5qnIT>U!=fUvX8Tmc^Vsk!=w008_f>=fP=
z0SrP<c-JB=mjt3ul&~?BFm;rOO;A=kJ*%Cb$P?tuXjNM9x&$Z~S5Uy%Jgt`K(7O$z
zj0Z}H2T&~5M0K9T>P5+ya8!a50onvoA~6GPD$k&Y=%&_Hn>o0U)dlT$o+AUul5GxQ
z)7w8Zx_uxcIKh5nD0F^GzBVIqKib8t`jJfrYbU_*p^wR(gHs1lM_IG>ksn|RF>O^G
zf({HCH*lpV(fZz*iC62--_^HUlb@@yGG2e)rS<aKOB=~q=mpph)qrpk9IF{>5fu&S
zouh&R`Yl|)BI%wi3C?%LPi%kH-x0PVlXJC|hViza=p#2G0)6knvBN%!A-{_&&jn;h
zG)`U=MEYA4z`{Lm2U-Vl(@<@y`6t}!T>azR$F0LTN{XCZP@5-n_XB?16Ba;s!4<e7
zAa`FH@CL=rz1g=UEH%Y#>A|~`VJPS?yXPM36MEKeJ^2;wMXziWb~lX|wE4Y^MWV8V
zXN!V|I=vR{wqNEJ1&-}kK-#aeIFCXkV^6dC6&4rRMyA;0ir`lm#~WL1kzNP(m`)Sl
z0uSd+IFrhp6(8BUZ@<Mi=TRJ-=rZQndJS5Y9jH#ya1ib_euPJ<g+iG`yQ)z#bF?!@
zSOfN1#-MrF6l5!-A5Bf<{TT?x2?!uWzWcTL{?U$*B|3o^mpo<~dwnQ*yj4^?A5k`Q
z^$%4Z<KO`lK#+?HAmPmal6*8nWq`~Oog|;ii6d;vlxT%@U?Y@BH$l#fDvZrfMco8+
zDfPTli7IDo&O-(87HY6rEPGMJxnCIwdi#_?uu>%LA`o>`TQIMJPV{4RS!q`{?6=V_
zmg|Ob>TAb4jyC%mi|c$!?4|iqOGr2KFrB>D!#xf)8N=w_R;%sgx}voNPCeGs{~FJ`
zx`HTps4xU=8OEVX6&|tAID#m+3T<&|C)AZaZ1^cEtAY)H02;m=n9>o7DbLlvL4<&K
z_)LyXpc;3sOKOgk9nyqkAQX7RQN%Z-iqcjvp<B=#ttVhBgWYdZ-t>_WJj!0x6r3>-
z|1E8f?S+g#R16u%5HgPCMql6#soZ%Om~ABpkQ#Cd&$Gez0&m>S8mr?x7_WDZ>zREZ
zj6&3g1N1Dm&dA%^32H6TLMAfUGbl1fh}Y+w`8j9y>gDTK7UVDu4YSu~E<Mon*2S5<
zURAs{cm2whn+}6T`*jY*ws#xhmapS2a($ndMBtV&sMCvcd<3L$lc`-toBb^N(jkTT
z#(-OZj~rRh41h*t1okN*7x!92T_e(?rq?pls-HP}Nq@k&`hc!jG6&#S2aJL-ZuA*X
zf-dWvic6$JqhQjDD0W`bo@A2F2BId?16e3~zy<2i7idYh4y{?-?1hk_U7&g8N5U?K
z`<$@(S!RB&Bid$=;KK<&RqdCrh3`e|7y8Yth4a$<nT(UO*ox61*$z@)T6M5avw))l
zC%Gki1w}^{MDm<R9*JuRTU%ik;6qH#Dv9e4GR>t&rz9eO+u>)ee5SvkS%3NH={GEh
zLd=<Hws%VU0d}$n<*>~AW4R={$!N#GAH;naH%{4JGZ-cmBrKR>Tsl{O&d{kZ96trO
zsy43@q@jYacFX}ghk3)KLBk&~g<X(jQCa{2JHHbg2dE2<&(%N4Q2}xCJ)jOA$blXY
z^YGDBQeCE;69y0hQM692`&cj2hDMQ4>*vtfya{u|jdOhyM&8<|v8mMQi|knD*KbU3
zP7rp8M~68-zOdo}w@^~YaWgi5qL!k;yaRUHY0ux#E{X9;5278!Cs#X%cH)?5whI!G
zRjW3=lBkzNDBYmEn!757FcP2F7<t$?SkP11v?+;O>~&K7Ok!ivc|bEws=<QTC0c7U
zP0~^P+oTxO>$84wWVcWi5b8ubY4^k{6+fwsUtp%0+!iFVn;Zn!0pSe4D<H~=Qho$+
z;KTN*_}}qNz8)!fu@mMnVxNAa5BXdlGQ2+IdBf&Y#<VeFK5Y)-NsR%Ngl}#fhDiPK
zRXVlNrr{G&oA8qMY;Uj-Zhegu86`W`X9n__OVVun93OMP_5%CR&GWz!=L_~bY^8PD
zODx`JA(9cHq^NfJB-A5=av5Vz>kobxVB4BGN5G1{s!oGZ7Dt9h#ww$gXDa>pic+nN
LRZdifDt-S3NY$g~

literal 0
HcmV?d00001

diff --git a/bcovington/src/utils/conll17_ud_eval.py b/bcovington/src/utils/conll17_ud_eval.py
new file mode 100644
index 0000000..c1ec200
--- /dev/null
+++ b/bcovington/src/utils/conll17_ud_eval.py
@@ -0,0 +1,556 @@
+#!/usr/bin/env python
+
+# CoNLL 2017 UD Parsing evaluation script.
+#
+# Compatible with Python 2.7 and 3.2+, can be used either as a module
+# or a standalone executable.
+#
+# Copyright 2017 Institute of Formal and Applied Linguistics (UFAL),
+# Faculty of Mathematics and Physics, Charles University, Czech Republic.
+#
+# Changelog:
+# - [02 Jan 2017] Version 0.9: Initial release
+# - [25 Jan 2017] Version 0.9.1: Fix bug in LCS alignment computation
+# - [10 Mar 2017] Version 1.0: Add documentation and test
+#                              Compare HEADs correctly using aligned words
+#                              Allow evaluation with errorneous spaces in forms
+#                              Compare forms in LCS case insensitively
+#                              Detect cycles and multiple root nodes
+#                              Compute AlignedAccuracy
+
+# Command line usage
+# ------------------
+# conll17_ud_eval.py [-v] [-w weights_file] gold_conllu_file system_conllu_file
+#
+# - if no -v is given, only the CoNLL17 UD Shared Task evaluation LAS metrics
+#   is printed
+# - if -v is given, several metrics are printed (as precision, recall, F1 score,
+#   and in case the metric is computed on aligned words also accuracy on these):
+#   - Tokens: how well do the gold tokens match system tokens
+#   - Sentences: how well do the gold sentences match system sentences
+#   - Words: how well can the gold words be aligned to system words
+#   - UPOS: using aligned words, how well does UPOS match
+#   - XPOS: using aligned words, how well does XPOS match
+#   - Feats: using aligned words, how well does FEATS match
+#   - AllTags: using aligned words, how well does UPOS+XPOS+FEATS match
+#   - Lemmas: using aligned words, how well does LEMMA match
+#   - UAS: using aligned words, how well does HEAD match
+#   - LAS: using aligned words, how well does HEAD+DEPREL(ignoring subtypes) match
+# - if weights_file is given (with lines containing deprel-weight pairs),
+#   one more metric is shown:
+#   - WeightedLAS: as LAS, but each deprel (ignoring subtypes) has different weight
+
+# API usage
+# ---------
+# - load_conllu(file)
+#   - loads CoNLL-U file from given file object to an internal representation
+#   - the file object should return str on both Python 2 and Python 3
+#   - raises UDError exception if the given file cannot be loaded
+# - evaluate(gold_ud, system_ud)
+#   - evaluate the given gold and system CoNLL-U files (loaded with load_conllu)
+#   - raises UDError if the concatenated tokens of gold and system file do not match
+#   - returns a dictionary with the metrics described above, each metrics having
+#     three fields: precision, recall and f1
+
+# Description of token matching
+# -----------------------------
+# In order to match tokens of gold file and system file, we consider the text
+# resulting from concatenation of gold tokens and text resulting from
+# concatenation of system tokens. These texts should match -- if they do not,
+# the evaluation fails.
+#
+# If the texts do match, every token is represented as a range in this original
+# text, and tokens are equal only if their range is the same.
+
+# Description of word matching
+# ----------------------------
+# When matching words of gold file and system file, we first match the tokens.
+# The words which are also tokens are matched as tokens, but words in multi-word
+# tokens have to be handled differently.
+#
+# To handle multi-word tokens, we start by finding "multi-word spans".
+# Multi-word span is a span in the original text such that
+# - it contains at least one multi-word token
+# - all multi-word tokens in the span (considering both gold and system ones)
+#   are completely inside the span (i.e., they do not "stick out")
+# - the multi-word span is as small as possible
+#
+# For every multi-word span, we align the gold and system words completely
+# inside this span using LCS on their FORMs. The words not intersecting
+# (even partially) any multi-word span are then aligned as tokens.
+
+
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import io
+import sys
+import unittest
+
+# CoNLL-U column names
+ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10)
+
+# UD Error is used when raising exceptions in this module
+class UDError(Exception):
+    pass
+
+# Load given CoNLL-U file into internal representation
+def load_conllu(file):
+    # Internal representation classes
+    class UDRepresentation:
+        def __init__(self):
+            # Characters of all the tokens in the whole file.
+            # Whitespace between tokens is not included.
+            self.characters = []
+            # List of UDSpan instances with start&end indices into `characters`.
+            self.tokens = []
+            # List of UDWord instances.
+            self.words = []
+            # List of UDSpan instances with start&end indices into `characters`.
+            self.sentences = []
+    class UDSpan:
+        def __init__(self, start, end):
+            self.start = start
+            # Note that self.end marks the first position **after the end** of span,
+            # so we can use characters[start:end] or range(start, end).
+            self.end = end
+    class UDWord:
+        def __init__(self, span, columns, is_multiword):
+            # Span of this word (or MWT, see below) within ud_representation.characters.
+            self.span = span
+            # 10 columns of the CoNLL-U file: ID, FORM, LEMMA,...
+            self.columns = columns
+            # is_multiword==True means that this word is part of a multi-word token.
+            # In that case, self.span marks the span of the whole multi-word token.
+            self.is_multiword = is_multiword
+            # Reference to the UDWord instance representing the HEAD (or None if root).
+            self.parent = None
+            # Let's ignore language-specific deprel subtypes.
+            self.columns[DEPREL] = columns[DEPREL].split(':')[0]
+
+    ud = UDRepresentation()
+
+    # Load the CoNLL-U file
+    index, sentence_start = 0, None
+    while True:
+        line = file.readline()
+        if not line:
+            break
+        line = line.rstrip("\r\n")
+
+        # Handle sentence start boundaries
+        if sentence_start is None:
+            # Skip comments
+            if line.startswith("#"):
+                continue
+            # Start a new sentence
+            ud.sentences.append(UDSpan(index, 0))
+            sentence_start = len(ud.words)
+        if not line:
+            # Add parent UDWord links and check there are no cycles
+            def process_word(word):
+                if word.parent == "remapping":
+                    raise UDError("There is a cycle in a sentence")
+                if word.parent is None:
+                    head = int(word.columns[HEAD])
+                    if head > len(ud.words) - sentence_start:
+                        raise UDError("HEAD '{}' points outside of the sentence".format(word.columns[HEAD]))
+                    if head:
+                        parent = ud.words[sentence_start + head - 1]
+                        word.parent = "remapping"
+                        process_word(parent)
+                        word.parent = parent
+
+            for word in ud.words[sentence_start:]:
+                process_word(word)
+
+            # Check there is a single root node
+            if len([word for word in ud.words[sentence_start:] if word.parent is None]) != 1:
+                raise UDError("There are multiple roots in a sentence")
+
+            # End the sentence
+            ud.sentences[-1].end = index
+            sentence_start = None
+            continue
+
+        # Read next token/word
+        columns = line.split("\t")
+        if len(columns) != 10:
+            raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(line))
+
+        # Skip empty nodes
+        if "." in columns[ID]:
+            continue
+
+        # Delete spaces from FORM  so gold.characters == system.characters
+        # even if one of them tokenizes the space.
+        columns[FORM] = columns[FORM].replace(" ", "")
+        if not columns[FORM]:
+            raise UDError("There is an empty FORM in the CoNLL-U file")
+
+        # Save token
+        ud.characters.extend(columns[FORM])
+        ud.tokens.append(UDSpan(index, index + len(columns[FORM])))
+        index += len(columns[FORM])
+
+        # Handle multi-word tokens to save word(s)
+        if "-" in columns[ID]:
+            try:
+                start, end = map(int, columns[ID].split("-"))
+            except:
+                raise UDError("Cannot parse multi-word token ID '{}'".format(columns[ID]))
+
+            for _ in range(start, end + 1):
+                word_line = file.readline().rstrip("\r\n")
+                word_columns = word_line.split("\t")
+                if len(word_columns) != 10:
+                    raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(word_line))
+                ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True))
+        # Basic tokens/words
+        else:
+            try:
+                word_id = int(columns[ID])
+            except:
+                raise UDError("Cannot parse word ID '{}'".format(columns[ID]))
+            if word_id != len(ud.words) - sentence_start + 1:
+                raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(columns[ID], columns[FORM], len(ud.words) - sentence_start + 1))
+
+            try:
+                head_id = int(columns[HEAD])
+            except:
+                raise UDError("Cannot parse HEAD '{}'".format(columns[HEAD]))
+            if head_id < 0:
+                raise UDError("HEAD cannot be negative")
+
+            ud.words.append(UDWord(ud.tokens[-1], columns, is_multiword=False))
+
+    if sentence_start is not None:
+        raise UDError("The CoNLL-U file does not end with empty line")
+
+    return ud
+
+# Evaluate the gold and system treebanks (loaded using load_conllu).
+def evaluate(gold_ud, system_ud, deprel_weights=None):
+    class Score:
+        def __init__(self, gold_total, system_total, correct, aligned_total=None):
+            self.precision = correct / system_total if system_total else 0.0
+            self.recall = correct / gold_total if gold_total else 0.0
+            self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0
+            self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total
+    class AlignmentWord:
+        def __init__(self, gold_word, system_word):
+            self.gold_word = gold_word
+            self.system_word = system_word
+            self.gold_parent = None
+            self.system_parent_gold_aligned = None
+    class Alignment:
+        def __init__(self, gold_words, system_words):
+            self.gold_words = gold_words
+            self.system_words = system_words
+            self.matched_words = []
+            self.matched_words_map = {}
+        def append_aligned_words(self, gold_word, system_word):
+            self.matched_words.append(AlignmentWord(gold_word, system_word))
+            self.matched_words_map[system_word] = gold_word
+        def fill_parents(self):
+            # We represent root parents in both gold and system data by '0'.
+            # For gold data, we represent non-root parent by corresponding gold word.
+            # For system data, we represent non-root parent by either gold word aligned
+            # to parent system nodes, or by None if no gold words is aligned to the parent.
+            for words in self.matched_words:
+                words.gold_parent = words.gold_word.parent if words.gold_word.parent is not None else 0
+                words.system_parent_gold_aligned = self.matched_words_map.get(words.system_word.parent, None) \
+                    if words.system_word.parent is not None else 0
+
+    def lower(text):
+        if sys.version_info < (3, 0) and isinstance(text, str):
+            return text.decode("utf-8").lower()
+        return text.lower()
+
+    def spans_score(gold_spans, system_spans):
+        correct, gi, si = 0, 0, 0
+        while gi < len(gold_spans) and si < len(system_spans):
+            if system_spans[si].start < gold_spans[gi].start:
+                si += 1
+            elif gold_spans[gi].start < system_spans[si].start:
+                gi += 1
+            else:
+                correct += gold_spans[gi].end == system_spans[si].end
+                si += 1
+                gi += 1
+
+        return Score(len(gold_spans), len(system_spans), correct)
+
+    def alignment_score(alignment, key_fn, weight_fn=lambda w: 1):
+        gold, system, aligned, correct = 0, 0, 0, 0
+
+        for word in alignment.gold_words:
+            gold += weight_fn(word)
+
+        for word in alignment.system_words:
+            system += weight_fn(word)
+
+        for words in alignment.matched_words:
+            aligned += weight_fn(words.gold_word)
+
+        if key_fn is None:
+            # Return score for whole aligned words
+            return Score(gold, system, aligned)
+
+        for words in alignment.matched_words:
+            if key_fn(words.gold_word, words.gold_parent) == key_fn(words.system_word, words.system_parent_gold_aligned):
+                correct += weight_fn(words.gold_word)
+
+        return Score(gold, system, correct, aligned)
+
+    def beyond_end(words, i, multiword_span_end):
+        if i >= len(words):
+            return True
+        if words[i].is_multiword:
+            return words[i].span.start >= multiword_span_end
+        return words[i].span.end > multiword_span_end
+
+    def extend_end(word, multiword_span_end):
+        if word.is_multiword and word.span.end > multiword_span_end:
+            return word.span.end
+        return multiword_span_end
+
+    def find_multiword_span(gold_words, system_words, gi, si):
+        # We know gold_words[gi].is_multiword or system_words[si].is_multiword.
+        # Find the start of the multiword span (gs, ss), so the multiword span is minimal.
+        # Initialize multiword_span_end characters index.
+        if gold_words[gi].is_multiword:
+            multiword_span_end = gold_words[gi].span.end
+            if not system_words[si].is_multiword and system_words[si].span.start < gold_words[gi].span.start:
+                si += 1
+        else: # if system_words[si].is_multiword
+            multiword_span_end = system_words[si].span.end
+            if not gold_words[gi].is_multiword and gold_words[gi].span.start < system_words[si].span.start:
+                gi += 1
+        gs, ss = gi, si
+
+        # Find the end of the multiword span
+        # (so both gi and si are pointing to the word following the multiword span end).
+        while not beyond_end(gold_words, gi, multiword_span_end) or \
+              not beyond_end(system_words, si, multiword_span_end):
+            if gi < len(gold_words) and (si >= len(system_words) or
+                                         gold_words[gi].span.start <= system_words[si].span.start):
+                multiword_span_end = extend_end(gold_words[gi], multiword_span_end)
+                gi += 1
+            else:
+                multiword_span_end = extend_end(system_words[si], multiword_span_end)
+                si += 1
+        return gs, ss, gi, si
+
+    def compute_lcs(gold_words, system_words, gi, si, gs, ss):
+        lcs = [[0] * (si - ss) for i in range(gi - gs)]
+        for g in reversed(range(gi - gs)):
+            for s in reversed(range(si - ss)):
+                if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]):
+                    lcs[g][s] = 1 + (lcs[g+1][s+1] if g+1 < gi-gs and s+1 < si-ss else 0)
+                lcs[g][s] = max(lcs[g][s], lcs[g+1][s] if g+1 < gi-gs else 0)
+                lcs[g][s] = max(lcs[g][s], lcs[g][s+1] if s+1 < si-ss else 0)
+        return lcs
+
+    def align_words(gold_words, system_words):
+        alignment = Alignment(gold_words, system_words)
+
+        gi, si = 0, 0
+        while gi < len(gold_words) and si < len(system_words):
+            if gold_words[gi].is_multiword or system_words[si].is_multiword:
+                # A: Multi-word tokens => align via LCS within the whole "multiword span".
+                gs, ss, gi, si = find_multiword_span(gold_words, system_words, gi, si)
+
+                if si > ss and gi > gs:
+                    lcs = compute_lcs(gold_words, system_words, gi, si, gs, ss)
+
+                    # Store aligned words
+                    s, g = 0, 0
+                    while g < gi - gs and s < si - ss:
+                        if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]):
+                            alignment.append_aligned_words(gold_words[gs+g], system_words[ss+s])
+                            g += 1
+                            s += 1
+                        elif lcs[g][s] == (lcs[g+1][s] if g+1 < gi-gs else 0):
+                            g += 1
+                        else:
+                            s += 1
+            else:
+                # B: No multi-word token => align according to spans.
+                if (gold_words[gi].span.start, gold_words[gi].span.end) == (system_words[si].span.start, system_words[si].span.end):
+                    alignment.append_aligned_words(gold_words[gi], system_words[si])
+                    gi += 1
+                    si += 1
+                elif gold_words[gi].span.start <= system_words[si].span.start:
+                    gi += 1
+                else:
+                    si += 1
+
+        alignment.fill_parents()
+
+        return alignment
+
+    # Check that underlying character sequences do match
+    if gold_ud.characters != system_ud.characters:
+        index = 0
+        while gold_ud.characters[index] == system_ud.characters[index]:
+            index += 1
+
+        raise UDError(
+            "The concatenation of tokens in gold file and in system file differ!\n" +
+            "First 20 differing characters in gold file: '{}' and system file: '{}'".format(
+                "".join(gold_ud.characters[index:index + 20]),
+                "".join(system_ud.characters[index:index + 20])
+            )
+        )
+
+    # Align words
+    alignment = align_words(gold_ud.words, system_ud.words)
+
+    # Compute the F1-scores
+    result = {
+        "Tokens": spans_score(gold_ud.tokens, system_ud.tokens),
+        "Sentences": spans_score(gold_ud.sentences, system_ud.sentences),
+        "Words": alignment_score(alignment, None),
+        "UPOS": alignment_score(alignment, lambda w, parent: w.columns[UPOS]),
+        "XPOS": alignment_score(alignment, lambda w, parent: w.columns[XPOS]),
+        "Feats": alignment_score(alignment, lambda w, parent: w.columns[FEATS]),
+        "AllTags": alignment_score(alignment, lambda w, parent: (w.columns[UPOS], w.columns[XPOS], w.columns[FEATS])),
+        "Lemmas": alignment_score(alignment, lambda w, parent: w.columns[LEMMA]),
+        "UAS": alignment_score(alignment, lambda w, parent: parent),
+        "LAS": alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL])),
+    }
+
+    # Add WeightedLAS if weights are given
+    if deprel_weights is not None:
+        def weighted_las(word):
+            return deprel_weights.get(word.columns[DEPREL], 1.0)
+        result["WeightedLAS"] = alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL]), weighted_las)
+
+    return result
+
+def load_deprel_weights(weights_file):
+    if weights_file is None:
+        return None
+
+    deprel_weights = {}
+    for line in weights_file:
+        # Ignore comments and empty lines
+        if line.startswith("#") or not line.strip():
+            continue
+
+        columns = line.rstrip("\r\n").split()
+        if len(columns) != 2:
+            raise ValueError("Expected two columns in the UD Relations weights file on line '{}'".format(line))
+
+        deprel_weights[columns[0]] = float(columns[1])
+
+    return deprel_weights
+
+def load_conllu_file(path):
+    _file = open(path, mode="r", **({"encoding": "utf-8"} if sys.version_info >= (3, 0) else {}))
+    return load_conllu(_file)
+
+def evaluate_wrapper(args):
+    # Load CoNLL-U files
+    gold_ud = load_conllu_file(args.gold_file)
+    system_ud = load_conllu_file(args.system_file)
+
+    # Load weights if requested
+    deprel_weights = load_deprel_weights(args.weights)
+
+    return evaluate(gold_ud, system_ud, deprel_weights)
+
+def main():
+    # Parse arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument("gold_file", type=str,
+                        help="Name of the CoNLL-U file with the gold data.")
+    parser.add_argument("system_file", type=str,
+                        help="Name of the CoNLL-U file with the predicted data.")
+    parser.add_argument("--weights", "-w", type=argparse.FileType("r"), default=None,
+                        metavar="deprel_weights_file",
+                        help="Compute WeightedLAS using given weights for Universal Dependency Relations.")
+    parser.add_argument("--verbose", "-v", default=0, action="count",
+                        help="Print all metrics.")
+    args = parser.parse_args()
+
+    # Use verbose if weights are supplied
+    if args.weights is not None and not args.verbose:
+        args.verbose = 1
+
+    # Evaluate
+    evaluation = evaluate_wrapper(args)
+
+    # Print the evaluation
+    if not args.verbose:
+        print("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1))
+    else:
+        metrics = ["Tokens", "Sentences", "Words", "UPOS", "XPOS", "Feats", "AllTags", "Lemmas", "UAS", "LAS"]
+        if args.weights is not None:
+            metrics.append("WeightedLAS")
+
+        print("Metrics    | Precision |    Recall |  F1 Score | AligndAcc")
+        print("-----------+-----------+-----------+-----------+-----------")
+        for metric in metrics:
+            print("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format(
+                metric,
+                100 * evaluation[metric].precision,
+                100 * evaluation[metric].recall,
+                100 * evaluation[metric].f1,
+                "{:10.2f}".format(100 * evaluation[metric].aligned_accuracy) if evaluation[metric].aligned_accuracy is not None else ""
+            ))
+
+if __name__ == "__main__":
+    main()
+
+# Tests, which can be executed with `python -m unittest conll17_ud_eval`.
+class TestAlignment(unittest.TestCase):
+    @staticmethod
+    def _load_words(words):
+        """Prepare fake CoNLL-U files with fake HEAD to prevent multiple roots errors."""
+        lines, num_words = [], 0
+        for w in words:
+            parts = w.split(" ")
+            if len(parts) == 1:
+                num_words += 1
+                lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, parts[0], int(num_words>1)))
+            else:
+                lines.append("{}-{}\t{}\t_\t_\t_\t_\t_\t_\t_\t_".format(num_words + 1, num_words + len(parts) - 1, parts[0]))
+                for part in parts[1:]:
+                    num_words += 1
+                    lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, part, int(num_words>1)))
+        return load_conllu((io.StringIO if sys.version_info >= (3, 0) else io.BytesIO)("\n".join(lines+["\n"])))
+
+    def _test_exception(self, gold, system):
+        self.assertRaises(UDError, evaluate, self._load_words(gold), self._load_words(system))
+
+    def _test_ok(self, gold, system, correct):
+        metrics = evaluate(self._load_words(gold), self._load_words(system))
+        gold_words = sum((max(1, len(word.split(" ")) - 1) for word in gold))
+        system_words = sum((max(1, len(word.split(" ")) - 1) for word in system))
+        self.assertEqual((metrics["Words"].precision, metrics["Words"].recall, metrics["Words"].f1),
+                         (correct / system_words, correct / gold_words, 2 * correct / (gold_words + system_words)))
+
+    def test_exception(self):
+        self._test_exception(["a"], ["b"])
+
+    def test_equal(self):
+        self._test_ok(["a"], ["a"], 1)
+        self._test_ok(["a", "b", "c"], ["a", "b", "c"], 3)
+
+    def test_equal_with_multiword(self):
+        self._test_ok(["abc a b c"], ["a", "b", "c"], 3)
+        self._test_ok(["a", "bc b c", "d"], ["a", "b", "c", "d"], 4)
+        self._test_ok(["abcd a b c d"], ["ab a b", "cd c d"], 4)
+        self._test_ok(["abc a b c", "de d e"], ["a", "bcd b c d", "e"], 5)
+
+    def test_alignment(self):
+        self._test_ok(["abcd"], ["a", "b", "c", "d"], 0)
+        self._test_ok(["abc", "d"], ["a", "b", "c", "d"], 1)
+        self._test_ok(["a", "bc", "d"], ["a", "b", "c", "d"], 2)
+        self._test_ok(["a", "bc b c", "d"], ["a", "b", "cd"], 2)
+        self._test_ok(["abc a BX c", "def d EX f"], ["ab a b", "cd c d", "ef e f"], 4)
+        self._test_ok(["ab a b", "cd bc d"], ["a", "bc", "d"], 2)
+        self._test_ok(["a", "bc b c", "d"], ["ab AX BX", "cd CX a"], 1)
diff --git a/bcovington/src/utils/eval.pl b/bcovington/src/utils/eval.pl
new file mode 100644
index 0000000..f8dbbfd
--- /dev/null
+++ b/bcovington/src/utils/eval.pl
@@ -0,0 +1,1827 @@
+#!/usr/bin/env perl
+
+# Author: Yuval Krymolowski
+# Addition of precision and recall 
+#   and of frame confusion list: Sabine Buchholz
+# Addition of DEPREL + ATTACHMENT:
+#   Prokopis Prokopidis (prokopis at ilsp dot gr)
+# Acknowledgements: 
+#   to Markus Kuhn for suggesting the use of 
+#   the Unicode category property
+
+if ($] < 5.008001)
+{
+  printf STDERR <<EOM
+
+ This script requires PERL 5.8.1 for running.
+ The new version is needed for proper handling
+ of Unicode characters.
+
+ Please obtain a new version or contact the shared task team
+ if you are unable to upgrade PERL.
+
+EOM
+;
+  exit(1) ;
+}
+
+require Encode;
+
+use strict ;
+use warnings;
+use Getopt::Std ;
+
+my ($usage) = <<EOT
+
+  CoNLL-X evaluation script:
+
+   [perl] eval.pl [OPTIONS] -g <gold standard> -s <system output>
+
+  This script evaluates a system output with respect to a gold standard.
+  Both files should be in UTF-8 encoded CoNLL-X tabular format.
+
+  Punctuation tokens (those where all characters have the Unicode
+  category property "Punctuation") are ignored for scoring (unless the
+  -p flag is used).
+
+  The output breaks down the errors according to their type and context.
+
+  Optional parameters:
+     -o FILE : output: print output to FILE (default is standard output)
+     -q : quiet:       only print overall performance, without the details
+     -b : evalb:       produce output in a format similar to evalb 
+                       (http://nlp.cs.nyu.edu/evalb/); use together with -q
+     -p : punctuation: also score on punctuation (default is not to score on it)
+     -v : version:     show the version number
+     -h : help:        print this help text and exit
+
+EOT
+;
+
+my ($line_num) ;
+my ($sep) = '0x01' ;
+
+my ($START) = '.S' ;
+my ($END) = '.E' ;
+
+my ($con_err_num) = 3 ;
+my ($freq_err_num) = 10 ;
+my ($spec_err_loc_con) = 8 ;
+
+################################################################################
+###                              subfunctions                                ###
+################################################################################
+
+# Whether a string consists entirely of characters with the Unicode
+# category property "Punctuation" (see "man perlunicode")
+sub is_uni_punct
+{
+  my ($word) = @_ ;
+
+  return scalar(Encode::decode_utf8($word)=~ /^\p{Punctuation}+$/) ;
+}
+
+# The length of a unicode string, excluding non-spacing marks
+# (for example vowel marks in Arabic)
+
+sub uni_len
+{
+  my ($word) = @_ ;
+  my ($ch, $l) ;
+
+  $l = 0 ;
+  foreach $ch (split(//,  Encode::decode_utf8($word)))
+  {
+    if ($ch !~ /^\p{NonspacingMark}/)
+    {
+      $l++ ;
+    }
+  }
+
+  return $l ;
+}
+
+sub filter_context_counts
+{ # filter_context_counts
+
+  my ($vec, $num, $max_len) = @_ ;
+  my ($con, $l, $thresh) ;
+
+  $thresh = (sort {$b <=> $a} values %{$vec})[$num-1] ;
+
+  foreach $con (keys %{$vec})
+  {
+    if (${$vec}{$con} < $thresh)
+    {
+      delete ${$vec}{$con} ;
+      next ;
+    }
+
+    $l = uni_len($con) ;
+
+    if ($l > ${$max_len})
+    {
+      ${$max_len} = $l ;
+    }
+  }
+
+} # filter_context_counts
+
+sub print_context
+{ # print_context
+
+  my ($counts, $counts_pos, $max_con_len, $max_con_pos_len) = @_ ;
+  my (@v_con, @v_con_pos, $con, $con_pos, $i, $n) ;
+
+  printf OUT "  %-*s | %-4s | %-4s | %-4s | %-4s", $max_con_pos_len, 'CPOS', 'any', 'head', 'dep', 'both' ;
+  printf OUT "  ||" ;
+  printf OUT "  %-*s | %-4s | %-4s | %-4s | %-4s", $max_con_len, 'word', 'any', 'head', 'dep', 'both' ;
+  printf OUT "\n" ;
+  printf OUT "  %s-+------+------+------+-----", '-' x $max_con_pos_len;
+  printf OUT "--++" ;
+  printf OUT "--%s-+------+------+------+-----", '-' x $max_con_len;
+  printf OUT "\n" ;
+
+  @v_con = sort {${$counts}{tot}{$b} <=> ${$counts}{tot}{$a}} keys %{${$counts}{tot}} ;
+  @v_con_pos = sort {${$counts_pos}{tot}{$b} <=> ${$counts_pos}{tot}{$a}} keys %{${$counts_pos}{tot}} ;
+
+  $n = scalar @v_con ;
+  if (scalar @v_con_pos > $n)
+  {
+    $n = scalar @v_con_pos ;
+  }
+
+  foreach $i (0 .. $n-1)
+  {
+    if (defined $v_con_pos[$i])
+    {
+      $con_pos = $v_con_pos[$i] ;
+      printf OUT "  %-*s | %4d | %4d | %4d | %4d",
+	$max_con_pos_len, $con_pos, ${$counts_pos}{tot}{$con_pos},
+	  ${$counts_pos}{err_head}{$con_pos}, ${$counts_pos}{err_dep}{$con_pos},
+	    ${$counts_pos}{err_dep}{$con_pos}+${$counts_pos}{err_head}{$con_pos}-${$counts_pos}{tot}{$con_pos} ;
+    }
+    else
+    {
+      printf OUT "  %-*s | %4s | %4s | %4s | %4s",
+	$max_con_pos_len, ' ', ' ', ' ', ' ', ' ' ;
+    }
+
+    printf OUT "  ||" ;
+
+    if (defined $v_con[$i])
+    {
+      $con = $v_con[$i] ;
+      printf OUT "  %-*s | %4d | %4d | %4d | %4d",
+	$max_con_len+length($con)-uni_len($con), $con, ${$counts}{tot}{$con},
+	  ${$counts}{err_head}{$con}, ${$counts}{err_dep}{$con},
+	    ${$counts}{err_dep}{$con}+${$counts}{err_head}{$con}-${$counts}{tot}{$con} ;
+    }
+    else
+    {
+      printf OUT "  %-*s | %4s | %4s | %4s | %4s",
+	$max_con_len, ' ', ' ', ' ', ' ', ' ' ;
+    }
+
+    printf OUT "\n" ;
+  }
+
+  printf OUT "  %s-+------+------+------+-----", '-' x $max_con_pos_len;
+  printf OUT "--++" ;
+  printf OUT "--%s-+------+------+------+-----", '-' x $max_con_len;
+  printf OUT "\n" ;
+
+  printf OUT "\n\n" ;
+
+} # print_context
+
+sub num_as_word
+{
+  my ($num) = @_ ;
+
+  $num = abs($num) ;
+
+  if ($num == 1)
+  {
+    return ('one word') ;
+  }
+  elsif ($num == 2)
+  {
+    return ('two words') ;
+  }
+  elsif ($num == 3)
+  {
+    return ('three words') ;
+  }
+  elsif ($num == 4)
+  {
+    return ('four words') ;
+  }
+  else
+  {
+    return ($num.' words') ;
+  }
+}
+
+sub describe_err
+{ # describe_err
+
+  my ($head_err, $head_aft_bef, $dep_err) = @_ ;
+  my ($dep_g, $dep_s, $desc) ;
+  my ($head_aft_bef_g, $head_aft_bef_s) = split(//, $head_aft_bef) ;
+
+  if ($head_err eq '-')
+  {
+    $desc = 'correct head' ;
+
+    if ($head_aft_bef_s eq '0')
+    {
+      $desc .= ' (0)' ;
+    }
+    elsif ($head_aft_bef_s eq 'e')
+    {
+      $desc .= ' (the focus word)' ;
+    }
+    elsif ($head_aft_bef_s eq 'a')
+    {
+      $desc .= ' (after the focus word)' ;
+    }
+    elsif ($head_aft_bef_s eq 'b')
+    {
+      $desc .= ' (before the focus word)' ;
+    }
+  }
+  elsif ($head_aft_bef_s eq '0')
+  {
+    $desc = 'head = 0 instead of ' ;
+    if ($head_aft_bef_g eq 'a')
+    {
+      $desc.= 'after ' ;
+    }
+    if ($head_aft_bef_g eq 'b')
+    {
+      $desc.= 'before ' ;
+    }
+    $desc .= 'the focus word' ;
+  }
+  elsif ($head_aft_bef_g eq '0')
+  {
+    $desc = 'head is ' ;
+    if ($head_aft_bef_g eq 'a')
+    {
+      $desc.= 'after ' ;
+    }
+    if ($head_aft_bef_g eq 'b')
+    {
+      $desc.= 'before ' ;
+    }
+    $desc .= 'the focus word instead of 0' ;
+  }
+  else
+  {
+    $desc = num_as_word($head_err) ;
+    if ($head_err < 0)
+    {
+      $desc .= ' before' ;
+    }
+    else
+    {
+      $desc .= ' after' ;
+    }
+
+    $desc = 'head '.$desc.' the correct head ' ;
+
+    if ($head_aft_bef_s eq '0')
+    {
+      $desc .= '(0' ;
+    }
+    elsif ($head_aft_bef_s eq 'e')
+    {
+      $desc .= '(the focus word' ;
+    }
+    elsif ($head_aft_bef_s eq 'a')
+    {
+      $desc .= '(after the focus word' ;
+    }
+    elsif ($head_aft_bef_s eq 'b')
+    {
+      $desc .= '(before the focus word' ;
+    }
+
+    if ($head_aft_bef_g ne $head_aft_bef_s)
+    {
+      $desc .= ' instead of' ;
+      if ($head_aft_bef_s eq '0')
+      {
+	$desc .= '0' ;
+      }
+      elsif ($head_aft_bef_s eq 'e')
+      {
+	$desc .= 'the focus word' ;
+      }
+      elsif ($head_aft_bef_s eq 'a')
+      {
+	$desc .= 'after the focus word' ;
+      }
+      elsif ($head_aft_bef_s eq 'b')
+      {
+	$desc .= 'before the focus word' ;
+      }
+    }
+
+    $desc .= ')' ;
+  }
+
+  $desc .= ', ' ;
+
+  if ($dep_err eq '-')
+  {
+    $desc .= 'correct dependency' ;
+  }
+  else
+  {
+    ($dep_g, $dep_s) = ($dep_err =~ /^(.*)->(.*)$/) ;
+    $desc .= sprintf('dependency "%s" instead of "%s"', $dep_s, $dep_g) ;
+  }
+
+  return($desc) ;
+
+} # describe_err
+
+sub get_context
+{ # get_context
+
+  my ($sent, $i_w) = @_ ;
+  my ($w_2, $w_1, $w1, $w2) ;
+  my ($p_2, $p_1, $p1, $p2) ;
+
+  if ($i_w >= 2)
+  {
+    $w_2 = ${${$sent}[$i_w-2]}{word} ;
+    $p_2 = ${${$sent}[$i_w-2]}{pos} ;
+  }
+  else
+  {
+    $w_2 = $START ;
+    $p_2 = $START ;
+  }
+
+  if ($i_w >= 1)
+  {
+    $w_1 = ${${$sent}[$i_w-1]}{word} ;
+    $p_1 = ${${$sent}[$i_w-1]}{pos} ;
+  }
+  else
+  {
+    $w_1 = $START ;
+    $p_1 = $START ;
+  }
+
+  if ($i_w <= scalar @{$sent}-2)
+  {
+    $w1 = ${${$sent}[$i_w+1]}{word} ;
+    $p1 = ${${$sent}[$i_w+1]}{pos} ;
+  }
+  else
+  {
+    $w1 = $END ;
+    $p1 = $END ;
+  }
+
+  if ($i_w <= scalar @{$sent}-3)
+  {
+    $w2 = ${${$sent}[$i_w+2]}{word} ;
+    $p2 = ${${$sent}[$i_w+2]}{pos} ;
+  }
+  else
+  {
+    $w2 = $END ;
+    $p2 = $END ;
+  }
+
+  return ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) ;
+
+} # get_context
+
+sub read_sent
+{ # read_sent
+
+  my ($sent_gold, $sent_sys) = @_ ;
+  my ($line_g, $line_s, $new_sent) ;
+  my (%fields_g, %fields_s) ;
+
+  $new_sent = 1 ;
+
+  @{$sent_gold} = () ;
+  @{$sent_sys} = () ;
+
+  while (1)
+  { # main reading loop
+
+    $line_g = <GOLD> ;
+    $line_s = <SYS> ;
+
+    $line_num++ ;
+
+    # system output has fewer lines than gold standard
+    if ((defined $line_g) && (! defined $line_s))
+    {
+	printf STDERR "line mismatch, line %d:\n", $line_num ;
+	printf STDERR " gold: %s", $line_g ;
+	printf STDERR " sys : past end of file\n" ;
+	exit(1) ;
+    }
+
+    # system output has more lines than gold standard
+    if ((! defined $line_g) && (defined $line_s))
+    {
+	printf STDERR "line mismatch, line %d:\n", $line_num ;
+	printf STDERR " gold: past end of file\n" ;
+	printf STDERR " sys : %s", $line_s ;
+	exit(1) ;
+    }
+    
+    # end of file reached for both
+    if ((! defined $line_g) && (! defined $line_s))
+    {
+	return (1) ;
+    }
+
+    # one contains end of sentence but other one does not
+    if (($line_g =~ /^\s+$/) != ($line_s =~ /^\s+$/))
+    {
+      printf STDERR "line mismatch, line %d:\n", $line_num ;
+      printf STDERR " gold: %s", $line_g ;
+      printf STDERR " sys : %s", $line_s ;
+      exit(1) ;
+    }
+
+    # end of sentence reached
+    if ($line_g =~ /^\s+$/)
+    {
+	return(0) ;
+    }
+
+    # now both lines contain information
+
+    if ($new_sent)
+    {
+      $new_sent = 0 ;
+    }
+
+    # 'official' column names
+    # options.output = ['id','form','lemma','cpostag','postag',
+    #                   'feats','head','deprel','phead','pdeprel']
+
+    @fields_g{'word', 'pos', 'head', 'dep'} = (split (/\s+/, $line_g))[1, 3, 6, 7] ;
+
+    push @{$sent_gold}, { %fields_g } ;
+
+    @fields_s{'word', 'pos', 'head', 'dep'} = (split (/\s+/, $line_s))[1, 3, 6, 7] ;
+
+    if (($fields_g{word} ne $fields_s{word})
+	||
+	($fields_g{pos} ne $fields_s{pos}))
+    {
+      printf STDERR "Word/pos mismatch, line %d:\n", $line_num ;
+      printf STDERR " gold: %s", $line_g ;
+      printf STDERR " sys : %s", $line_s ;
+      exit(1) ;
+    }
+
+    push @{$sent_sys}, { %fields_s } ;
+
+  } # main reading loop
+  
+} # read_sent
+
+################################################################################
+###                                  main                                    ###
+################################################################################
+
+our ($opt_g, $opt_s, $opt_o, $opt_h, $opt_v, $opt_q, $opt_p, $opt_b) ;
+
+my ($sent_num, $eof, $word_num, @err_sent) ;
+my (@sent_gold, @sent_sys, @starts) ;
+my ($word, $pos, $wp, $head_g, $dep_g, $head_s, $dep_s) ;
+my (%counts, $err_head, $err_dep, $con, $con1, $con_pos, $con_pos1, $thresh) ;
+my ($head_err, $dep_err, @cur_err, %err_counts, $err_counter, $err_desc) ;
+my ($loc_con, %loc_con_err_counts, %err_desc) ;
+my ($head_aft_bef_g, $head_aft_bef_s, $head_aft_bef) ;
+my ($con_bef, $con_aft, $con_bef_2, $con_aft_2, @bits, @e_bits, @v_con, @v_con_pos) ;
+my ($con_pos_bef, $con_pos_aft, $con_pos_bef_2, $con_pos_aft_2) ;
+my ($max_word_len, $max_pos_len, $max_con_len, $max_con_pos_len) ;
+my ($max_word_spec_len, $max_con_bef_len, $max_con_aft_len) ;
+my (%freq_err, $err) ;
+
+my ($i, $j, $i_w, $l, $n_args) ;
+my ($w_2, $w_1, $w1, $w2) ;
+my ($wp_2, $wp_1, $wp1, $wp2) ;
+my ($p_2, $p_1, $p1, $p2) ;
+
+my ($short_output) ;
+my ($score_on_punct) ;
+$counts{punct} = 0; # initialize
+
+getopts("g:o:s:qvhpb") ;
+
+if (defined $opt_v)
+{
+    my $id = '$Id: eval.pl,v 1.9 2006/05/09 20:30:01 yuval Exp $';
+    my @parts = split ' ',$id;
+    print "Version $parts[2]\n";
+    exit(0);
+}
+
+if ((defined $opt_h) || ((! defined $opt_g) && (! defined $opt_s)))
+{
+  die $usage ;
+}
+
+if (! defined $opt_g)
+{
+  die "Gold standard file (-g) missing\n" ;
+}
+
+if (! defined $opt_s)
+{
+  die "System output file (-s) missing\n" ;
+}
+
+if (! defined $opt_o)
+{
+  $opt_o = '-' ;
+}
+
+if (defined $opt_q)
+{
+    $short_output = 1 ;
+} else {
+    $short_output = 0 ;
+}
+
+if (defined $opt_p)
+{
+    $score_on_punct = 1 ;
+} else {
+    $score_on_punct = 0 ;
+}
+
+$line_num = 0 ;
+$sent_num = 0 ;
+$eof = 0 ;
+
+@err_sent = () ;
+@starts = () ;
+
+%{$err_sent[0]} = () ;
+
+$max_pos_len = length('CPOS') ;
+
+################################################################################
+###                              reading input                               ###
+################################################################################
+
+open (GOLD, "<$opt_g") || die "Could not open gold standard file $opt_g\n" ;
+open (SYS,  "<$opt_s") || die "Could not open system output file $opt_s\n" ;
+open (OUT,  ">$opt_o") || die "Could not open output file $opt_o\n" ;
+
+
+if (defined $opt_b) {  # produce output similar to evalb
+    print OUT "     Sent.          Attachment      Correct        Scoring          \n";
+    print OUT "    ID Tokens  -   Unlab. Lab.   HEAD HEAD+DEPREL   tokens   - - - -\n";
+    print OUT "  ============================================================================\n";
+}
+
+
+while (! $eof)
+{ # main reading loop
+
+  $starts[$sent_num] = $line_num+1 ;
+  $eof = read_sent(\@sent_gold, \@sent_sys) ;
+
+  $sent_num++ ;
+
+  %{$err_sent[$sent_num]} = () ;
+  $word_num = scalar @sent_gold ;
+
+  # for accuracy per sentence
+  my %sent_counts = ( tot      => 0,
+		      err_any  => 0,
+		      err_head => 0
+		      ); 
+
+  # printf "$sent_num $word_num\n" ;
+
+  my @frames_g = ('** '); # the initial frame for the virtual root
+  my @frames_s = ('** '); # the initial frame for the virtual root
+  foreach $i_w (0 .. $word_num-1)
+  { # loop on words
+      push @frames_g, ''; # initialize
+      push @frames_s, ''; # initialize
+  }
+
+  foreach $i_w (0 .. $word_num-1)
+  { # loop on words
+
+    ($word, $pos, $head_g, $dep_g)
+      = @{$sent_gold[$i_w]}{'word', 'pos', 'head', 'dep'} ;
+    $wp = $word.' / '.$pos ;
+
+    # printf "%d: %s %s %s %s\n", $i_w,  $word, $pos, $head_g, $dep_g ;
+
+    if ((! $score_on_punct) && is_uni_punct($word))
+    {
+      $counts{punct}++ ;
+      # ignore punctuations
+      next ;
+    }
+
+    if (length($pos) > $max_pos_len)
+    {
+      $max_pos_len = length($pos) ;
+    }
+
+    ($head_s, $dep_s) = @{$sent_sys[$i_w]}{'head', 'dep'} ;
+
+    $counts{tot}++ ;
+    $counts{word}{$wp}{tot}++ ;
+    $counts{pos}{$pos}{tot}++ ;
+    $counts{head}{$head_g-$i_w-1}{tot}++ ;
+
+    # for frame confusions
+    # add child to frame of parent
+    $frames_g[$head_g] .= "$dep_g ";
+    $frames_s[$head_s] .= "$dep_s ";
+    # add to frame of token itself
+    $frames_g[$i_w+1] .= "*$dep_g* "; # $i_w+1 because $i_w starts counting at zero
+    $frames_s[$i_w+1] .= "*$dep_g* ";
+
+    # for precision and recall of DEPREL
+    $counts{dep}{$dep_g}{tot}++ ;     # counts for gold standard deprels
+    $counts{dep2}{$dep_g}{$dep_s}++ ; # counts for confusions
+    $counts{dep_s}{$dep_s}{tot}++ ;   # counts for system deprels
+    $counts{all_dep}{$dep_g} = 1 ;    # list of all deprels that occur ...
+    $counts{all_dep}{$dep_s} = 1 ;    # ... in either gold or system output
+
+    # for precision and recall of HEAD direction
+    my $dir_g;
+    if ($head_g == 0) {
+	$dir_g = 'to_root';
+    } elsif ($head_g < $i_w+1) { # $i_w+1 because $i_w starts counting at zero
+                                 # also below
+	$dir_g = 'left';
+    } elsif ($head_g > $i_w+1) {
+	$dir_g = 'right';
+    } else {
+        # token links to itself; should never happen in correct gold standard
+	$dir_g = 'self'; 
+    }
+    my $dir_s;
+    if ($head_s == 0) {
+	$dir_s = 'to_root';
+    } elsif ($head_s < $i_w+1) {
+	$dir_s = 'left';
+    } elsif ($head_s > $i_w+1) {
+	$dir_s = 'right';
+    } else {
+        # token links to itself; should not happen in good system 
+        # (but not forbidden in shared task)
+	$dir_s = 'self'; 
+    }
+    $counts{dir_g}{$dir_g}{tot}++ ;   # counts for gold standard head direction
+    $counts{dir2}{$dir_g}{$dir_s}++ ; # counts for confusions
+    $counts{dir_s}{$dir_s}{tot}++ ;   # counts for system head direction
+
+    # for precision and recall of HEAD distance
+    my $dist_g;
+    if ($head_g == 0) {
+	$dist_g = 'to_root';
+    } elsif ( abs($head_g - ($i_w+1)) <= 1 ) {
+	$dist_g = '1'; # includes the 'self' cases
+    } elsif ( abs($head_g - ($i_w+1)) <= 2 ) {
+	$dist_g = '2';
+    } elsif ( abs($head_g - ($i_w+1)) <= 6 ) {
+	$dist_g = '3-6';
+    } else {
+	$dist_g = '7-...';
+    }
+    my $dist_s;
+    if ($head_s == 0) {
+	$dist_s = 'to_root';
+    } elsif ( abs($head_s - ($i_w+1)) <= 1 ) {
+	$dist_s = '1'; # includes the 'self' cases
+    } elsif ( abs($head_s - ($i_w+1)) <= 2 ) {
+	$dist_s = '2';
+    } elsif ( abs($head_s - ($i_w+1)) <= 6 ) {
+	$dist_s = '3-6';
+    } else {
+	$dist_s = '7-...';
+    }
+    $counts{dist_g}{$dist_g}{tot}++ ;    # counts for gold standard head distance
+    $counts{dist2}{$dist_g}{$dist_s}++ ; # counts for confusions
+    $counts{dist_s}{$dist_s}{tot}++ ;    # counts for system head distance
+
+
+    $err_head = ($head_g ne $head_s) ; # error in head
+    $err_dep = ($dep_g ne $dep_s) ;    # error in deprel
+
+    $head_err = '-' ;
+    $dep_err = '-' ;
+
+    # for accuracy per sentence
+    $sent_counts{tot}++ ;
+    if ($err_dep || $err_head) {
+	$sent_counts{err_any}++ ;
+    }
+    if ($err_head) {
+	$sent_counts{err_head}++ ;
+    }
+
+    # total counts and counts for CPOS involved in errors
+
+    if ($head_g eq '0')
+    {
+      $head_aft_bef_g = '0' ;
+    }
+    elsif ($head_g eq $i_w+1)
+    {
+      $head_aft_bef_g = 'e' ;
+    }
+    else
+    {
+      $head_aft_bef_g = ($head_g <= $i_w+1 ? 'b' : 'a') ;
+    }
+
+    if ($head_s eq '0')
+    {
+      $head_aft_bef_s = '0' ;
+    }
+    elsif ($head_s eq $i_w+1)
+    {
+      $head_aft_bef_s = 'e' ;
+    }
+    else
+    {
+      $head_aft_bef_s = ($head_s <= $i_w+1 ? 'b' : 'a') ;
+    }
+
+    $head_aft_bef = $head_aft_bef_g.$head_aft_bef_s ;
+
+    if ($err_head)
+    {
+      if ($head_aft_bef_s eq '0')
+      {
+	$head_err = 0 ;
+      }
+      else
+      {
+	$head_err = $head_s-$head_g ;
+      }
+
+      $err_sent[$sent_num]{head}++ ;
+      $counts{err_head}{tot}++ ;
+      $counts{err_head}{$head_err}++ ;
+
+      $counts{word}{err_head}{$wp}++ ;
+      $counts{pos}{$pos}{err_head}{tot}++ ;
+      $counts{pos}{$pos}{err_head}{$head_err}++ ;
+    }
+
+    if ($err_dep)
+    {
+      $dep_err = $dep_g.'->'.$dep_s ;
+      $err_sent[$sent_num]{dep}++ ;
+      $counts{err_dep}{tot}++ ;
+      $counts{err_dep}{$dep_err}++ ;
+
+      $counts{word}{err_dep}{$wp}++ ;
+      $counts{pos}{$pos}{err_dep}{tot}++ ;
+      $counts{pos}{$pos}{err_dep}{$dep_err}++ ;
+
+      if ($err_head)
+      {
+	$counts{err_both}++ ;
+	$counts{pos}{$pos}{err_both}++ ;
+      }
+    }
+
+    ### DEPREL + ATTACHMENT
+    if ((!$err_dep) && ($err_head)) {
+	$counts{err_head_corr_dep}{tot}++ ;
+	$counts{err_head_corr_dep}{$dep_s}++ ;
+    }
+    ### DEPREL + ATTACHMENT
+
+    # counts for words involved in errors
+
+    if (! ($err_head || $err_dep))
+    {
+      next ;
+    }
+
+    $err_sent[$sent_num]{word}++ ;
+    $counts{err_any}++ ;
+    $counts{word}{err_any}{$wp}++ ;
+    $counts{pos}{$pos}{err_any}++ ;
+
+    ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) = get_context(\@sent_gold, $i_w) ;
+
+    if ($w_2 ne $START)
+    {
+      $wp_2 = $w_2.' / '.$p_2 ;
+    }
+    else
+    {
+      $wp_2 = $w_2 ;
+    }
+
+    if ($w_1 ne $START)
+    {
+      $wp_1 = $w_1.' / '.$p_1 ;
+    }
+    else
+    {
+      $wp_1 = $w_1 ;
+    }
+
+    if ($w1 ne $END)
+    {
+      $wp1 = $w1.' / '.$p1 ;
+    }
+    else
+    {
+      $wp1 = $w1 ;
+    }
+
+    if ($w2 ne $END)
+    {
+      $wp2 = $w2.' / '.$p2 ;
+    }
+    else
+    {
+      $wp2 = $w2 ;
+    }
+
+    $con_bef = $wp_1 ;
+    $con_bef_2 = $wp_2.' + '.$wp_1 ;
+    $con_aft = $wp1 ;
+    $con_aft_2 = $wp1.' + '.$wp2 ;
+
+    $con_pos_bef = $p_1 ;
+    $con_pos_bef_2 = $p_2.'+'.$p_1 ;
+    $con_pos_aft = $p1 ;
+    $con_pos_aft_2 = $p1.'+'.$p2 ;
+
+    if ($w_1 ne $START)
+    {
+      # do not count '.S' as a word context
+      $counts{con_bef_2}{tot}{$con_bef_2}++ ;
+      $counts{con_bef_2}{err_head}{$con_bef_2} += $err_head ;
+      $counts{con_bef_2}{err_dep}{$con_bef_2} += $err_dep ;
+      $counts{con_bef}{tot}{$con_bef}++ ;
+      $counts{con_bef}{err_head}{$con_bef} += $err_head ;
+      $counts{con_bef}{err_dep}{$con_bef} += $err_dep ;
+    }
+
+    if ($w1 ne $END)
+    {
+      # do not count '.E' as a word context
+      $counts{con_aft_2}{tot}{$con_aft_2}++ ;
+      $counts{con_aft_2}{err_head}{$con_aft_2} += $err_head ;
+      $counts{con_aft_2}{err_dep}{$con_aft_2} += $err_dep ;
+      $counts{con_aft}{tot}{$con_aft}++ ;
+      $counts{con_aft}{err_head}{$con_aft} += $err_head ;
+      $counts{con_aft}{err_dep}{$con_aft} += $err_dep ;
+    }
+
+    $counts{con_pos_bef_2}{tot}{$con_pos_bef_2}++ ;
+    $counts{con_pos_bef_2}{err_head}{$con_pos_bef_2} += $err_head ;
+    $counts{con_pos_bef_2}{err_dep}{$con_pos_bef_2} += $err_dep ;
+    $counts{con_pos_bef}{tot}{$con_pos_bef}++ ;
+    $counts{con_pos_bef}{err_head}{$con_pos_bef} += $err_head ;
+    $counts{con_pos_bef}{err_dep}{$con_pos_bef} += $err_dep ;
+
+    $counts{con_pos_aft_2}{tot}{$con_pos_aft_2}++ ;
+    $counts{con_pos_aft_2}{err_head}{$con_pos_aft_2} += $err_head ;
+    $counts{con_pos_aft_2}{err_dep}{$con_pos_aft_2} += $err_dep ;
+    $counts{con_pos_aft}{tot}{$con_pos_aft}++ ;
+    $counts{con_pos_aft}{err_head}{$con_pos_aft} += $err_head ;
+    $counts{con_pos_aft}{err_dep}{$con_pos_aft} += $err_dep ;
+
+    $err = $head_err.$sep.$head_aft_bef.$sep.$dep_err ;
+    $freq_err{$err}++ ;
+
+  } # loop on words
+
+  foreach $i_w (0 .. $word_num) # including one for the virtual root
+  { # loop on words
+      if ($frames_g[$i_w] ne $frames_s[$i_w]) {
+	  $counts{frame2}{"$frames_g[$i_w]/ $frames_s[$i_w]"}++ ;
+      }
+  }
+
+  if (defined $opt_b) { # produce output similar to evalb
+      if ($word_num > 0) {
+	  my ($unlabeled,$labeled) = ('NaN', 'NaN');
+	  if ($sent_counts{tot} > 0) { # there are scoring tokens
+	      $unlabeled = 100-$sent_counts{err_head}*100.0/$sent_counts{tot};
+	      $labeled   = 100-$sent_counts{err_any} *100.0/$sent_counts{tot};
+	  }
+	  printf OUT "  %4d %4d    0  %6.2f %6.2f  %4d    %4d        %4d    0 0 0 0\n", 
+	  $sent_num, $word_num, 
+	  $unlabeled, $labeled, 
+	  $sent_counts{tot}-$sent_counts{err_head}, 
+	  $sent_counts{tot}-$sent_counts{err_any}, 
+	  $sent_counts{tot},;
+      }
+  }
+
+} # main reading loop
+
+################################################################################
+###                             printing output                              ###
+################################################################################
+
+if (defined $opt_b) {  # produce output similar to evalb
+    print OUT "\n\n";
+}
+printf OUT "  Labeled   attachment score: %d / %d * 100 = %.2f %%\n", 
+    $counts{tot}-$counts{err_any},      $counts{tot}, 100-$counts{err_any}*100.0/$counts{tot} ;
+printf OUT "  Unlabeled attachment score: %d / %d * 100 = %.2f %%\n", 
+    $counts{tot}-$counts{err_head}{tot}, $counts{tot}, 100-$counts{err_head}{tot}*100.0/$counts{tot} ;
+printf OUT "  Label accuracy score:       %d / %d * 100 = %.2f %%\n", 
+    $counts{tot}-$counts{err_dep}{tot}, $counts{tot}, 100-$counts{err_dep}{tot}*100.0/$counts{tot} ;
+
+if ($short_output)
+{
+    exit(0) ;
+}
+printf OUT "\n  %s\n\n", '=' x 80 ;
+printf OUT "  Evaluation of the results in %s\n  vs. gold standard %s:\n\n", $opt_s, $opt_g ;
+
+printf OUT "  Legend: '%s' - the beginning of a sentence, '%s' - the end of a sentence\n\n", $START, $END ;
+
+printf OUT "  Number of non-scoring tokens: $counts{punct}\n\n";
+
+printf OUT "  The overall accuracy and its distribution over CPOSTAGs\n\n" ;
+printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
+
+printf OUT "  %-10s | %-5s | %-5s |   %%  | %-5s |   %%  | %-5s |   %%\n",
+  'Accuracy', 'words', 'right', 'right', 'both' ;
+printf OUT "  %-10s | %-5s | %-5s |      | %-5s |      | %-5s |\n",
+  ' ', ' ', 'head', ' dep', 'right' ;
+
+printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
+
+printf OUT "  %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n",
+  'total', $counts{tot},
+  $counts{tot}-$counts{err_head}{tot}, 100-$counts{err_head}{tot}*100.0/$counts{tot},
+  $counts{tot}-$counts{err_dep}{tot}, 100-$counts{err_dep}{tot}*100.0/$counts{tot},
+  $counts{tot}-$counts{err_any}, 100-$counts{err_any}*100.0/$counts{tot} ;
+
+printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
+
+foreach $pos (sort {$counts{pos}{$b}{tot} <=> $counts{pos}{$a}{tot}} keys %{$counts{pos}})
+{
+    if (! defined($counts{pos}{$pos}{err_head}{tot}))
+    {
+	$counts{pos}{$pos}{err_head}{tot} = 0 ;
+    }
+    if (! defined($counts{pos}{$pos}{err_dep}{tot}))
+    {
+	$counts{pos}{$pos}{err_dep}{tot} = 0 ;
+    }
+    if (! defined($counts{pos}{$pos}{err_any}))
+    {
+	$counts{pos}{$pos}{err_any} = 0 ;
+    }
+
+    printf OUT "  %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n",
+    $pos, $counts{pos}{$pos}{tot},
+    $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_head}{tot}, 100-$counts{pos}{$pos}{err_head}{tot}*100.0/$counts{pos}{$pos}{tot},
+    $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_dep}{tot}, 100-$counts{pos}{$pos}{err_dep}{tot}*100.0/$counts{pos}{$pos}{tot},
+    $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_any}, 100-$counts{pos}{$pos}{err_any}*100.0/$counts{pos}{$pos}{tot} ;
+}
+
+printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
+
+printf OUT "\n\n" ;
+
+printf OUT "  The overall error rate and its distribution over CPOSTAGs\n\n" ;
+printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
+
+printf OUT "  %-10s | %-5s | %-5s |   %%  | %-5s |   %%  | %-5s |   %%\n",
+  'Error', 'words', 'head', ' dep', 'both' ;
+printf OUT "  %-10s | %-5s | %-5s |      | %-5s |      | %-5s |\n",
+
+  'Rate', ' ', 'err', ' err', 'wrong' ;
+
+printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
+
+printf OUT "  %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n",
+  'total', $counts{tot},
+  $counts{err_head}{tot}, $counts{err_head}{tot}*100.0/$counts{tot},
+  $counts{err_dep}{tot}, $counts{err_dep}{tot}*100.0/$counts{tot},
+  $counts{err_both}, $counts{err_both}*100.0/$counts{tot} ;
+
+printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
+
+foreach $pos (sort {$counts{pos}{$b}{tot} <=> $counts{pos}{$a}{tot}} keys %{$counts{pos}})
+{
+    if (! defined($counts{pos}{$pos}{err_both}))
+    {
+	$counts{pos}{$pos}{err_both} = 0 ;
+    }
+
+    printf OUT "  %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n",
+    $pos, $counts{pos}{$pos}{tot},
+    $counts{pos}{$pos}{err_head}{tot}, $counts{pos}{$pos}{err_head}{tot}*100.0/$counts{pos}{$pos}{tot},
+    $counts{pos}{$pos}{err_dep}{tot}, $counts{pos}{$pos}{err_dep}{tot}*100.0/$counts{pos}{$pos}{tot},
+    $counts{pos}{$pos}{err_both}, $counts{pos}{$pos}{err_both}*100.0/$counts{pos}{$pos}{tot} ;
+    
+}
+
+printf OUT "%s\n", "  -----------+-------+-------+------+-------+------+-------+-------" ;
+
+### added by Sabine Buchholz
+printf OUT "\n\n";
+printf OUT "  Precision and recall of DEPREL\n\n";
+printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
+printf OUT "  deprel          | gold | correct | system | recall (%%) | precision (%%) \n";
+printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
+foreach my $dep (sort keys %{$counts{all_dep}}) {
+    # initialize
+    my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN');
+
+    if (defined($counts{dep2}{$dep}{$dep})) {
+	$tot_corr = $counts{dep2}{$dep}{$dep};
+    } 
+    if (defined($counts{dep}{$dep}{tot})) {
+    	$tot_g = $counts{dep}{$dep}{tot};
+	$rec = sprintf("%.2f",$tot_corr / $tot_g * 100);
+    }
+    if (defined($counts{dep_s}{$dep}{tot})) {
+	$tot_s = $counts{dep_s}{$dep}{tot};
+	$prec = sprintf("%.2f",$tot_corr / $tot_s * 100);
+    }
+    printf OUT "  %-15s | %4d | %7d | %6d | %10s | %13s\n",
+    $dep, $tot_g, $tot_corr, $tot_s, $rec, $prec;
+}
+
+### DEPREL + ATTACHMENT:
+### Same as Sabine's DEPREL apart from $tot_corr calculation
+printf OUT "\n\n";
+printf OUT "  Precision and recall of DEPREL + ATTACHMENT\n\n";
+printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
+printf OUT "  deprel          | gold | correct | system | recall (%%) | precision (%%) \n";
+printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
+foreach my $dep (sort keys %{$counts{all_dep}}) {
+    # initialize
+    my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN');
+
+    if (defined($counts{dep2}{$dep}{$dep})) {
+	if (defined($counts{err_head_corr_dep}{$dep})) {
+	    $tot_corr = $counts{dep2}{$dep}{$dep} - $counts{err_head_corr_dep}{$dep};
+	} else {
+	    $tot_corr = $counts{dep2}{$dep}{$dep};
+	}
+    } 
+    if (defined($counts{dep}{$dep}{tot})) {
+    	$tot_g = $counts{dep}{$dep}{tot};
+	$rec = sprintf("%.2f",$tot_corr / $tot_g * 100);
+    }
+    if (defined($counts{dep_s}{$dep}{tot})) {
+	$tot_s = $counts{dep_s}{$dep}{tot};
+	$prec = sprintf("%.2f",$tot_corr / $tot_s * 100);
+    }
+    printf OUT "  %-15s | %4d | %7d | %6d | %10s | %13s\n",
+    $dep, $tot_g, $tot_corr, $tot_s, $rec, $prec;
+}
+### DEPREL + ATTACHMENT
+
+printf OUT "\n\n";
+printf OUT "  Precision and recall of binned HEAD direction\n\n";
+printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
+printf OUT "  direction       | gold | correct | system | recall (%%) | precision (%%) \n";
+printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
+foreach my $dir ('to_root', 'left', 'right', 'self') {
+    # initialize
+    my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN');
+
+    if (defined($counts{dir2}{$dir}{$dir})) {
+	$tot_corr = $counts{dir2}{$dir}{$dir};
+    } 
+    if (defined($counts{dir_g}{$dir}{tot})) {
+    	$tot_g = $counts{dir_g}{$dir}{tot};
+	$rec = sprintf("%.2f",$tot_corr / $tot_g * 100);
+    }
+    if (defined($counts{dir_s}{$dir}{tot})) {
+	$tot_s = $counts{dir_s}{$dir}{tot};
+	$prec = sprintf("%.2f",$tot_corr / $tot_s * 100);
+    }
+    printf OUT "  %-15s | %4d | %7d | %6d | %10s | %13s\n",
+    $dir, $tot_g, $tot_corr, $tot_s, $rec, $prec;
+}
+
+printf OUT "\n\n";
+printf OUT "  Precision and recall of binned HEAD distance\n\n";
+printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
+printf OUT "  distance        | gold | correct | system | recall (%%) | precision (%%) \n";
+printf OUT "  ----------------+------+---------+--------+------------+---------------\n";
+foreach my $dist ('to_root', '1', '2', '3-6', '7-...') {
+    # initialize
+    my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN');
+
+    if (defined($counts{dist2}{$dist}{$dist})) {
+	$tot_corr = $counts{dist2}{$dist}{$dist};
+    } 
+    if (defined($counts{dist_g}{$dist}{tot})) {
+    	$tot_g = $counts{dist_g}{$dist}{tot};
+	$rec = sprintf("%.2f",$tot_corr / $tot_g * 100);
+    }
+    if (defined($counts{dist_s}{$dist}{tot})) {
+	$tot_s = $counts{dist_s}{$dist}{tot};
+	$prec = sprintf("%.2f",$tot_corr / $tot_s * 100);
+    }
+    printf OUT "  %-15s | %4d | %7d | %6d | %10s | %13s\n",
+    $dist, $tot_g, $tot_corr, $tot_s, $rec, $prec;
+}
+
+printf OUT "\n\n";
+printf OUT "  Frame confusions (gold versus system; *...* marks the head token)\n\n";
+foreach my $frame (sort {$counts{frame2}{$b} <=> $counts{frame2}{$a}} keys %{$counts{frame2}})
+{
+    if ($counts{frame2}{$frame} >= 5) # (make 5 a changeable threshold later)
+    {
+	printf OUT "  %3d  %s\n", $counts{frame2}{$frame}, $frame;
+    }
+}
+### end of: added by Sabine Buchholz
+
+
+#
+# Leave only the 5 words mostly involved in errors
+#
+
+
+$thresh = (sort {$b <=> $a} values %{$counts{word}{err_any}})[4] ;
+
+# ensure enough space for title
+$max_word_len = length('word') ;
+
+foreach $word (keys %{$counts{word}{err_any}})
+{
+  if ($counts{word}{err_any}{$word} < $thresh)
+  {
+    delete $counts{word}{err_any}{$word} ;
+    next ;
+  }
+
+  $l = uni_len($word) ;
+  if ($l > $max_word_len)
+  {
+    $max_word_len = $l ;
+  }
+}
+
+# filter a case when the difference between the error counts
+# for 2-word and 1-word contexts is small
+# (leave the 2-word context)
+
+foreach $con (keys %{$counts{con_aft_2}{tot}})
+{
+  ($w1) = split(/\+/, $con) ;
+  
+  if (defined $counts{con_aft}{tot}{$w1} &&
+      $counts{con_aft}{tot}{$w1}-$counts{con_aft_2}{tot}{$con} <= 1)
+  {
+    delete $counts{con_aft}{tot}{$w1} ;
+  }
+}
+
+foreach $con (keys %{$counts{con_bef_2}{tot}})
+{
+  ($w_2, $w_1) = split(/\+/, $con) ;
+
+  if (defined $counts{con_bef}{tot}{$w_1} &&
+      $counts{con_bef}{tot}{$w_1}-$counts{con_bef_2}{tot}{$con} <= 1)
+  {
+    delete $counts{con_bef}{tot}{$w_1} ;
+  }
+}
+
+foreach $con_pos (keys %{$counts{con_pos_aft_2}{tot}})
+{
+  ($p1) = split(/\+/, $con_pos) ;
+
+  if (defined($counts{con_pos_aft}{tot}{$p1}) &&
+      $counts{con_pos_aft}{tot}{$p1}-$counts{con_pos_aft_2}{tot}{$con_pos} <= 1)
+  {
+    delete $counts{con_pos_aft}{tot}{$p1} ;
+  }
+}
+
+foreach $con_pos (keys %{$counts{con_pos_bef_2}{tot}})
+{
+  ($p_2, $p_1) = split(/\+/, $con_pos) ;
+
+  if (defined($counts{con_pos_bef}{tot}{$p_1}) &&
+      $counts{con_pos_bef}{tot}{$p_1}-$counts{con_pos_bef_2}{tot}{$con_pos} <= 1)
+  {
+    delete $counts{con_pos_bef}{tot}{$p_1} ;
+  }
+}
+
+# for each context type, take the three contexts most involved in errors
+
+$max_con_len = 0 ;
+
+filter_context_counts($counts{con_bef_2}{tot}, $con_err_num, \$max_con_len) ;
+
+filter_context_counts($counts{con_bef}{tot}, $con_err_num, \$max_con_len) ;
+
+filter_context_counts($counts{con_aft}{tot}, $con_err_num, \$max_con_len) ;
+
+filter_context_counts($counts{con_aft_2}{tot}, $con_err_num, \$max_con_len) ;
+
+# for each CPOS context type, take the three CPOS contexts most involved in errors
+
+$max_con_pos_len = 0 ;
+
+$thresh = (sort {$b <=> $a} values %{$counts{con_pos_bef_2}{tot}})[$con_err_num-1] ;
+
+foreach $con_pos (keys %{$counts{con_pos_bef_2}{tot}})
+{
+  if ($counts{con_pos_bef_2}{tot}{$con_pos} < $thresh)
+  {
+    delete $counts{con_pos_bef_2}{tot}{$con_pos} ;
+    next ;
+  }
+  if (length($con_pos) > $max_con_pos_len)
+  {
+    $max_con_pos_len = length($con_pos) ;
+  }
+}
+
+$thresh = (sort {$b <=> $a} values %{$counts{con_pos_bef}{tot}})[$con_err_num-1] ;
+
+foreach $con_pos (keys %{$counts{con_pos_bef}{tot}})
+{
+  if ($counts{con_pos_bef}{tot}{$con_pos} < $thresh)
+  {
+    delete $counts{con_pos_bef}{tot}{$con_pos} ;
+    next ;
+  }
+  if (length($con_pos) > $max_con_pos_len)
+  {
+    $max_con_pos_len = length($con_pos) ;
+  }
+}
+
+$thresh = (sort {$b <=> $a} values %{$counts{con_pos_aft}{tot}})[$con_err_num-1] ;
+
+foreach $con_pos (keys %{$counts{con_pos_aft}{tot}})
+{
+  if ($counts{con_pos_aft}{tot}{$con_pos} < $thresh)
+  {
+    delete $counts{con_pos_aft}{tot}{$con_pos} ;
+    next ;
+  }
+  if (length($con_pos) > $max_con_pos_len)
+  {
+    $max_con_pos_len = length($con_pos) ;
+  }
+}
+
+$thresh = (sort {$b <=> $a} values %{$counts{con_pos_aft_2}{tot}})[$con_err_num-1] ;
+
+foreach $con_pos (keys %{$counts{con_pos_aft_2}{tot}})
+{
+  if ($counts{con_pos_aft_2}{tot}{$con_pos} < $thresh)
+  {
+    delete $counts{con_pos_aft_2}{tot}{$con_pos} ;
+    next ;
+  }
+  if (length($con_pos) > $max_con_pos_len)
+  {
+    $max_con_pos_len = length($con_pos) ;
+  }
+}
+
+# printing
+
+# ------------- focus words
+
+printf OUT "\n\n" ;
+printf OUT "  %d focus words where most of the errors occur:\n\n", scalar keys %{$counts{word}{err_any}} ;
+
+printf OUT "  %-*s | %-4s | %-4s | %-4s | %-4s\n", $max_word_len, ' ', 'any', 'head', 'dep', 'both' ;
+printf OUT "  %s-+------+------+------+------\n", '-' x $max_word_len;
+
+foreach $word (sort {$counts{word}{err_any}{$b} <=> $counts{word}{err_any}{$a}} keys %{$counts{word}{err_any}})
+{
+    if (!defined($counts{word}{err_head}{$word}))
+    {
+	$counts{word}{err_head}{$word} = 0 ;
+    }
+    if (! defined($counts{word}{err_dep}{$word}))
+    {
+	$counts{word}{err_dep}{$word} = 0 ;
+    }
+    if (! defined($counts{word}{err_any}{$word}))
+    {
+	$counts{word}{err_any}{$word} = 0;
+    }
+    printf OUT "  %-*s | %4d | %4d | %4d | %4d\n",
+    $max_word_len+length($word)-uni_len($word), $word, $counts{word}{err_any}{$word},
+    $counts{word}{err_head}{$word},
+    $counts{word}{err_dep}{$word},
+    $counts{word}{err_dep}{$word}+$counts{word}{err_head}{$word}-$counts{word}{err_any}{$word} ;
+}
+
+printf OUT "  %s-+------+------+------+------\n", '-' x $max_word_len;
+
+# ------------- contexts
+
+printf OUT "\n\n" ;
+
+printf OUT "  one-token preceeding contexts where most of the errors occur:\n\n" ;
+
+print_context($counts{con_bef}, $counts{con_pos_bef}, $max_con_len, $max_con_pos_len) ;
+
+printf OUT "  two-token preceeding contexts where most of the errors occur:\n\n" ;
+
+print_context($counts{con_bef_2}, $counts{con_pos_bef_2}, $max_con_len, $max_con_pos_len) ;
+
+printf OUT "  one-token following contexts where most of the errors occur:\n\n" ;
+
+print_context($counts{con_aft}, $counts{con_pos_aft}, $max_con_len, $max_con_pos_len) ;
+
+printf OUT "  two-token following contexts where most of the errors occur:\n\n" ;
+
+print_context($counts{con_aft_2}, $counts{con_pos_aft_2}, $max_con_len, $max_con_pos_len) ;
+
+# ------------- Sentences
+
+printf OUT "  Sentence with the highest number of word errors:\n" ;
+$i = (sort { (defined($err_sent[$b]{word}) && $err_sent[$b]{word})
+		 <=> (defined($err_sent[$a]{word}) && $err_sent[$a]{word}) } 1 .. $sent_num)[0] ;
+printf OUT "   Sentence %d line %d, ", $i, $starts[$i-1] ;
+printf OUT "%d head errors, %d dependency errors, %d word errors\n",
+  $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ;
+
+printf OUT "\n\n" ;
+
+printf OUT "  Sentence with the highest number of head errors:\n" ;
+$i = (sort { (defined($err_sent[$b]{head}) && $err_sent[$b]{head}) 
+		 <=> (defined($err_sent[$a]{head}) && $err_sent[$a]{head}) } 1 .. $sent_num)[0] ;
+printf OUT "   Sentence %d line %d, ", $i, $starts[$i-1] ;
+printf OUT "%d head errors, %d dependency errors, %d word errors\n",
+  $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ;
+
+printf OUT "\n\n" ;
+
+printf OUT "  Sentence with the highest number of dependency errors:\n" ;
+$i = (sort { (defined($err_sent[$b]{dep}) && $err_sent[$b]{dep}) 
+		 <=> (defined($err_sent[$a]{dep}) && $err_sent[$a]{dep}) } 1 .. $sent_num)[0] ;
+printf OUT "   Sentence %d line %d, ", $i, $starts[$i-1] ;
+printf OUT "%d head errors, %d dependency errors, %d word errors\n",
+  $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ;
+
+#
+# Second pass, collect statistics of the frequent errors
+#
+
+# filter the errors, leave the most frequent $freq_err_num errors
+
+$i = 0 ;
+
+$thresh = (sort {$b <=> $a} values %freq_err)[$freq_err_num-1] ;
+
+foreach $err (keys %freq_err)
+{
+  if ($freq_err{$err} < $thresh)
+  {
+    delete $freq_err{$err} ;
+  }
+}
+
+# in case there are several errors with the threshold count
+
+$freq_err_num = scalar keys %freq_err ;
+
+%err_counts = () ;
+
+$eof = 0 ;
+
+seek (GOLD, 0, 0) ;
+seek (SYS, 0, 0) ;
+
+while (! $eof)
+{ # second reading loop
+
+  $eof = read_sent(\@sent_gold, \@sent_sys) ;
+  $sent_num++ ;
+
+  $word_num = scalar @sent_gold ;
+
+  # printf "$sent_num $word_num\n" ;
+  
+  foreach $i_w (0 .. $word_num-1)
+  { # loop on words
+    ($word, $pos, $head_g, $dep_g)
+      = @{$sent_gold[$i_w]}{'word', 'pos', 'head', 'dep'} ;
+
+    # printf "%d: %s %s %s %s\n", $i_w,  $word, $pos, $head_g, $dep_g ;
+
+    if ((! $score_on_punct) && is_uni_punct($word))
+    {
+      # ignore punctuations
+      next ;
+    }
+
+    ($head_s, $dep_s) = @{$sent_sys[$i_w]}{'head', 'dep'} ;
+
+    $err_head = ($head_g ne $head_s) ;
+    $err_dep = ($dep_g ne $dep_s) ;
+
+    $head_err = '-' ;
+    $dep_err = '-' ;
+
+    if ($head_g eq '0')
+    {
+      $head_aft_bef_g = '0' ;
+    }
+    elsif ($head_g eq $i_w+1)
+    {
+      $head_aft_bef_g = 'e' ;
+    }
+    else
+    {
+      $head_aft_bef_g = ($head_g <= $i_w+1 ? 'b' : 'a') ;
+    }
+
+    if ($head_s eq '0')
+    {
+      $head_aft_bef_s = '0' ;
+    }
+    elsif ($head_s eq $i_w+1)
+    {
+      $head_aft_bef_s = 'e' ;
+    }
+    else
+    {
+      $head_aft_bef_s = ($head_s <= $i_w+1 ? 'b' : 'a') ;
+    }
+
+    $head_aft_bef = $head_aft_bef_g.$head_aft_bef_s ;
+
+    if ($err_head)
+    {
+      if ($head_aft_bef_s eq '0')
+      {
+	$head_err = 0 ;
+      }
+      else
+      {
+	$head_err = $head_s-$head_g ;
+      }
+    }
+
+    if ($err_dep)
+    {
+      $dep_err = $dep_g.'->'.$dep_s ;
+    }
+
+    if (! ($err_head || $err_dep))
+    {
+      next ;
+    }
+
+    # handle only the most frequent errors
+
+    $err = $head_err.$sep.$head_aft_bef.$sep.$dep_err ;
+
+    if (! exists $freq_err{$err})
+    {
+      next ;
+    }
+
+    ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) = get_context(\@sent_gold, $i_w) ;
+
+    $con_bef = $w_1 ;
+    $con_bef_2 = $w_2.' + '.$w_1 ;
+    $con_aft = $w1 ;
+    $con_aft_2 = $w1.' + '.$w2 ;
+
+    $con_pos_bef = $p_1 ;
+    $con_pos_bef_2 = $p_2.'+'.$p_1 ;
+    $con_pos_aft = $p1 ;
+    $con_pos_aft_2 = $p1.'+'.$p2 ;
+
+    @cur_err = ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) ;
+
+    # printf "# %-25s %-15s %-10s %-25s %-3s %-30s\n",
+    #  $con_bef, $word, $pos, $con_aft, $head_err, $dep_err ;
+    
+    @bits = (0, 0, 0, 0, 0, 0) ;
+    $j = 0 ;
+
+    while ($j == 0)
+    {
+      for ($i = 0; $i <= $#bits; $i++)
+      {
+	if ($bits[$i] == 0)
+	{
+	  $bits[$i] = 1 ;
+	  $j = 0 ;
+	  last ;
+	}
+	else
+	{
+	  $bits[$i] = 0 ;
+	  $j = 1 ;
+	}
+      }
+
+      @e_bits = @cur_err ;
+
+      for ($i = 0; $i <= $#bits; $i++)
+      {
+	if (! $bits[$i])
+	{
+	  $e_bits[$i] = '*' ;
+	}
+      }
+
+      # include also the last case which is the most general
+      # (wildcards for everything)
+      $err_counts{$err}{join($sep, @e_bits)}++ ;
+
+    }
+
+  } # loop on words
+} # second reading loop
+
+printf OUT "\n\n" ;
+printf OUT "  Specific errors, %d most frequent errors:", $freq_err_num ;
+printf OUT "\n  %s\n", '=' x 41 ;
+
+
+# deleting local contexts which are too general
+
+foreach $err (keys %err_counts)
+{
+  foreach $loc_con (sort {$err_counts{$err}{$b} <=> $err_counts{$err}{$a}}
+		    keys %{$err_counts{$err}})
+  {
+    @cur_err = split(/\Q$sep\E/, $loc_con) ;
+
+    # In this loop, one or two elements of the local context are
+    # replaced with '*' to make it more general. If the entry for
+    # the general context has the same count it is removed.
+
+    foreach $i (0 .. $#cur_err)
+    {
+      $w1 = $cur_err[$i] ;
+      if ($cur_err[$i] eq '*')
+      {
+	next ;
+      }
+      $cur_err[$i] = '*' ;
+      $con1 = join($sep, @cur_err) ;
+      if ( defined($err_counts{$err}{$con1}) && defined($err_counts{$err}{$loc_con})
+	   && ($err_counts{$err}{$con1} == $err_counts{$err}{$loc_con}))
+      {
+	delete $err_counts{$err}{$con1} ;
+      }
+      for ($j = $i+1; $j <=$#cur_err; $j++)
+      {
+	if ($cur_err[$j] eq '*')
+	{
+	  next ;
+	}
+	$w2 = $cur_err[$j] ;
+	$cur_err[$j] = '*' ;
+	$con1 = join($sep, @cur_err) ;
+	if ( defined($err_counts{$err}{$con1}) && defined($err_counts{$err}{$loc_con})
+	     && ($err_counts{$err}{$con1} == $err_counts{$err}{$loc_con}))
+	{
+	  delete $err_counts{$err}{$con1} ;
+	}
+	$cur_err[$j] = $w2 ;
+      }
+      $cur_err[$i] = $w1 ;
+    }
+  }
+}
+
+# Leaving only the topmost local contexts for each error
+
+foreach $err (keys %err_counts)
+{
+  $thresh = (sort {$b <=> $a} values %{$err_counts{$err}})[$spec_err_loc_con-1] || 0 ;
+
+  # of the threshold is too low, take the 2nd highest count
+  # (the highest may be the total which is the generic case
+  #   and not relevant for printing)
+
+  if ($thresh < 5)
+  {
+    $thresh = (sort {$b <=> $a} values %{$err_counts{$err}})[1] ;
+  }
+
+  foreach $loc_con (keys %{$err_counts{$err}})
+  {
+    if ($err_counts{$err}{$loc_con} < $thresh)
+    {
+      delete $err_counts{$err}{$loc_con} ;
+    }
+    else
+    {
+      if ($loc_con ne join($sep, ('*', '*', '*', '*', '*', '*')))
+      {
+	$loc_con_err_counts{$loc_con}{$err} = $err_counts{$err}{$loc_con} ;
+      }
+    }
+  }
+}
+
+# printing an error summary
+
+# calculating the context field length
+
+$max_word_spec_len= length('word') ;
+$max_con_aft_len = length('word') ;
+$max_con_bef_len = length('word') ;
+$max_con_pos_len = length('CPOS') ;
+
+foreach $err (keys %err_counts)
+{
+  foreach $loc_con (sort keys %{$err_counts{$err}})
+  {
+    ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) =
+      split(/\Q$sep\E/, $loc_con) ;
+
+    $l = uni_len($word) ;
+    if ($l > $max_word_spec_len)
+    {
+      $max_word_spec_len = $l ;
+    }
+
+    $l = uni_len($con_bef) ;
+    if ($l > $max_con_bef_len)
+    {
+      $max_con_bef_len = $l ;
+    }
+
+    $l = uni_len($con_aft) ;
+    if ($l > $max_con_aft_len)
+    {
+      $max_con_aft_len = $l ;
+    }
+
+    if (length($con_pos_aft) > $max_con_pos_len)
+    {
+      $max_con_pos_len = length($con_pos_aft) ;
+    }
+
+    if (length($con_pos_bef) > $max_con_pos_len)
+    {
+      $max_con_pos_len = length($con_pos_bef) ;
+    }
+  }
+}
+
+$err_counter = 0 ;
+
+foreach $err (sort {$freq_err{$b} <=> $freq_err{$a}} keys %freq_err)
+{
+
+  ($head_err, $head_aft_bef, $dep_err) = split(/\Q$sep\E/, $err) ;
+
+  $err_counter++ ;
+  $err_desc{$err} = sprintf("%2d. ", $err_counter).
+    describe_err($head_err, $head_aft_bef, $dep_err) ;
+  
+  # printf OUT "  %-3s %-30s %d\n", $head_err, $dep_err, $freq_err{$err} ;
+  printf OUT "\n" ;
+  printf OUT "  %s : %d times\n", $err_desc{$err}, $freq_err{$err} ;
+
+  printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n",
+    '-' x $max_con_pos_len, '-' x $max_con_bef_len,
+       '-' x $max_pos_len, '-' x $max_word_spec_len,
+	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
+
+  printf OUT "  %-*s | %-*s | %-*s | %s\n",
+      $max_con_pos_len+$max_con_bef_len+3, '  Before',
+	$max_word_spec_len+$max_pos_len+3, '   Focus',
+	  $max_con_pos_len+$max_con_aft_len+3, '  After',
+	    'Count' ;
+
+  printf OUT "  %-*s   %-*s | %-*s   %-*s | %-*s   %-*s |\n",
+    $max_con_pos_len, 'CPOS', $max_con_bef_len, 'word',
+       $max_pos_len, 'CPOS', $max_word_spec_len, 'word',
+	$max_con_pos_len, 'CPOS', $max_con_aft_len, 'word' ;
+  
+  printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n",
+    '-' x $max_con_pos_len, '-' x $max_con_bef_len,
+       '-' x $max_pos_len, '-' x $max_word_spec_len,
+	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
+
+  foreach $loc_con (sort {$err_counts{$err}{$b} <=> $err_counts{$err}{$a}}
+		    keys %{$err_counts{$err}})
+  {
+    if ($loc_con eq join($sep, ('*', '*', '*', '*', '*', '*')))
+    {
+      next ;
+    }
+
+    $con1 = $loc_con ;
+    $con1 =~ s/\*/ /g ;
+
+    ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) =
+      split(/\Q$sep\E/, $con1) ;
+
+    printf OUT "  %-*s | %-*s | %-*s | %-*s | %-*s | %-*s | %3d\n",
+      $max_con_pos_len, $con_pos_bef, $max_con_bef_len+length($con_bef)-uni_len($con_bef), $con_bef,
+	  $max_pos_len, $pos, $max_word_spec_len+length($word)-uni_len($word), $word,
+	    $max_con_pos_len, $con_pos_aft, $max_con_aft_len+length($con_aft)-uni_len($con_aft), $con_aft,
+	      $err_counts{$err}{$loc_con} ;
+  }
+  
+  printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n",
+    '-' x $max_con_pos_len, '-' x $max_con_bef_len,
+       '-' x $max_pos_len, '-' x $max_word_spec_len,
+	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
+
+}
+
+printf OUT "\n\n" ;
+printf OUT "  Local contexts involved in several frequent errors:" ;
+printf OUT "\n  %s\n", '=' x 51 ;
+printf OUT "\n\n" ;
+
+foreach $loc_con (sort {scalar keys %{$loc_con_err_counts{$b}} <=>
+			  scalar keys %{$loc_con_err_counts{$a}}}
+		  keys %loc_con_err_counts)
+{
+
+  if (scalar keys %{$loc_con_err_counts{$loc_con}} == 1)
+  {
+    next ;
+  }
+  
+  printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n",
+    '-' x $max_con_pos_len, '-' x $max_con_bef_len,
+       '-' x $max_pos_len, '-' x $max_word_spec_len,
+	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
+
+  printf OUT "  %-*s | %-*s | %-*s \n",
+      $max_con_pos_len+$max_con_bef_len+3, '  Before',
+	$max_word_spec_len+$max_pos_len+3, '   Focus',
+	  $max_con_pos_len+$max_con_aft_len+3, '  After' ;
+
+  printf OUT "  %-*s   %-*s | %-*s   %-*s | %-*s   %-*s \n",
+    $max_con_pos_len, 'CPOS', $max_con_bef_len, 'word',
+       $max_pos_len, 'CPOS', $max_word_spec_len, 'word',
+	$max_con_pos_len, 'CPOS', $max_con_aft_len, 'word' ;
+  
+  printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n",
+    '-' x $max_con_pos_len, '-' x $max_con_bef_len,
+       '-' x $max_pos_len, '-' x $max_word_spec_len,
+	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
+
+  $con1 = $loc_con ;
+  $con1 =~ s/\*/ /g ;
+
+  ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) =
+      split(/\Q$sep\E/, $con1) ;
+
+  printf OUT "  %-*s | %-*s | %-*s | %-*s | %-*s | %-*s \n",
+    $max_con_pos_len, $con_pos_bef, $max_con_bef_len+length($con_bef)-uni_len($con_bef), $con_bef,
+      $max_pos_len, $pos, $max_word_spec_len+length($word)-uni_len($word), $word,
+	$max_con_pos_len, $con_pos_aft, $max_con_aft_len+length($con_aft)-uni_len($con_aft), $con_aft ;
+	  
+  printf OUT "  %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n",
+    '-' x $max_con_pos_len, '-' x $max_con_bef_len,
+       '-' x $max_pos_len, '-' x $max_word_spec_len,
+	'-' x $max_con_pos_len, '-' x $max_con_aft_len ;
+
+  foreach $err (sort {$loc_con_err_counts{$loc_con}{$b} <=>
+			$loc_con_err_counts{$loc_con}{$a}}
+		keys %{$loc_con_err_counts{$loc_con}})
+  {
+    printf OUT "  %s : %d times\n", $err_desc{$err},
+      $loc_con_err_counts{$loc_con}{$err} ;
+  }
+
+  printf OUT "\n" ;
+}
+
+close GOLD ;
+close SYS ;
+
+close OUT ;
+
diff --git a/bcovington/src/utils/weights.clas b/bcovington/src/utils/weights.clas
new file mode 100644
index 0000000..eee7ac6
--- /dev/null
+++ b/bcovington/src/utils/weights.clas
@@ -0,0 +1,11 @@
+# Relations used to attach function words to content words
+aux   0.1
+case  0.1
+cc    0.1
+clf   0.1
+cop   0.1
+det   0.1
+mark  0.1
+
+# Punctuation
+punct 0
diff --git a/bcovington/utils.py b/bcovington/utils.py
new file mode 100644
index 0000000..40d3c27
--- /dev/null
+++ b/bcovington/utils.py
@@ -0,0 +1,269 @@
+from collections import Counter
+import re
+
+"""
+This is a module slightly extended from original utils in BIST-Parser:
+https://github.com/elikip/bist-parser/blob/master/barchybrid/src/utils.py
+
+that has been adapted to include to support non-projective transition-based dependency parsing
+and CoNLLU dependencies.
+"""
+
+class CovingtonConfiguration(object):
+    """
+    Nivre, J. (2008). Algorithms for deterministic incremental dependency parsing. Computational Linguistics, 34(4), 513-553.
+    
+    l1: Word Id of the word at the top of the lambda one list
+    b: Word Id of the word at the top of the buffer
+    sentence: List of ConllEntry
+    A: set of created arcs (tuples (headID,dependentID))
+    """
+    
+    def __init__(self,l1,b,sentence, A):
+        
+        self.l1 = l1
+        self.b = b
+        self.sentence = sentence
+        self.A = A
+    
+    def __str__(self):
+        return str(self.l1)+" "+str(self.b)+" "+str(self.A)
+
+
+class ConllEntry(object):
+    """
+    Contains the information of a line in a CoNLL-X file.
+    """
+    
+    def __init__(self, id, form, lemma, cpos, pos, feats, 
+                 parent_id=None, relation=None):
+        
+        self.id = id
+        self.form = form
+        self.lemma = normalize(lemma)
+        self.norm = normalize(form)
+        self.cpos = cpos
+        self.pos = pos
+        self.feats = feats
+        self.parent_id = parent_id
+        self.relation = relation
+
+        #By default everything is assigned to a dummy root
+        self.pred_parent_id = 0
+        self.pred_relation = 'root'
+    
+    #For debugging
+    def __str__(self):
+        return "["+'\,'.join(map(str,[self.id,self.form,self.lemma,self.norm,self.cpos,self.pos,self.feats,self.parent_id,self.relation]))+"]"
+
+
+
+def vocab(conll_path):
+    
+    wordsCount = Counter()
+    lemmasCount = Counter()
+    cposCount = Counter()
+    posCount = Counter()
+    featsCount = Counter()
+    relCount = Counter()
+
+    with open(conll_path, 'r') as conllFP:
+        for sentence in read_conll(conllFP):
+
+            wordsCount.update([node.norm for node in sentence])
+            lemmasCount.update([node.lemma for node in sentence])
+            cposCount.update([node.cpos for node in sentence])
+            posCount.update([node.pos for node in sentence])
+            featsCount.update([node.feats for node in sentence])
+            relCount.update([node.relation for node in sentence])
+
+    return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, 
+            lemmasCount, {l: i for i, l in enumerate(lemmasCount.keys())},
+            cposCount.keys(), posCount.keys(), featsCount.keys(), 
+            relCount.keys())
+
+
+def read_conll(fh):
+    """
+    Reads a ConLL file given a file object fh
+    """
+    
+    non_proj_sentences = 0
+    read = 0
+    tokens_read = 0
+    root = ConllEntry(0, '*root*', '*root-lemma*', 'ROOT-POS', 'ROOT-CPOS','FEATS-ROOT', 0, 'rroot')
+    tokens = [root]
+    for line in fh:
+        
+        if line.startswith('#'): continue  
+        tok = line.strip().split('\t')
+        if not tok or tok == ['']: #If it is empty line
+            if len(tokens)>1:
+                yield tokens
+                read += 1
+            tokens = [root]
+            id = 0
+        else:
+            try:
+                if "." in tok[0] or "-" in tok[0]: continue
+                tokens.append(ConllEntry(int(tok[0]), tok[1], tok[2] ,tok[3], 
+                                         tok[4], tok[5], int(tok[6]) if tok[6] != '_' else -1 , tok[7]))
+                tokens_read+=1
+
+            except IndexError:
+                pass
+
+    #Last sentence
+    if len(tokens) > 1:
+        yield tokens
+    print read, 'sentences read.'
+    print tokens_read ,'tokens read'
+
+
+def write_conll(fn, conll_gen):
+    """
+    Writes a CoNLL file
+    """
+    with open(fn, 'w') as fh:
+        for sentence in conll_gen:
+            for entry in sentence[1:]:
+                fh.write('\t'.join([str(entry.id), entry.form, entry.lemma, entry.cpos, entry.pos, entry.feats, str(entry.pred_parent_id), entry.pred_relation, '_', '_']))
+                fh.write('\n')
+            fh.write('\n')
+
+
+
+numberRegex = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+");
+def normalize(word):
+    return 'NUM' if numberRegex.match(word) else word.lower()
+
+
+
+
+"""
+Looks for multiword expressions in the CoNLL file and creates a lookup table that
+allows to reconstruct then the output
+"""
+def lookup_conll_extra_data(fh):
+    
+    lookup = {}
+    sentence_id = 0
+    lookup[sentence_id] = {}
+    id_insert_before = 1
+    
+    for line in fh:
+        
+        if line.startswith('#'): continue
+        tok = line.strip().split('\t')
+
+        if not tok or tok == ['']: #If it is empty line
+            sentence_id+=1
+            id_insert_before = 1
+            lookup[sentence_id] = {}
+        else:
+            if "." in tok[0] or "-" in tok[0]:
+                lookup[sentence_id][id_insert_before] = line
+            else:
+                id_insert_before+=1
+ 
+    return lookup
+            
+"""
+dumps the content of the lookup table extracted by lookup_conll_extra_data
+into a output conll_path
+"""
+def dump_lookup_extra_into_conll(conll_path,lookup):
+    
+    sentence_id = 0
+    word_id = 1
+
+    with codecs.open(conll_path) as f_conll:
+        lines = f_conll.readlines()
+
+    #DUMPING the content of the file
+    f_conll = codecs.open(conll_path,"w")
+    
+    for line in lines:
+        
+        tok = line.strip().split('\t')
+        if tok == ['']: #If it is empty line
+            sentence_id+=1
+            word_id = 1
+        else:
+            if sentence_id in lookup: 
+                if word_id in lookup[sentence_id]:
+                    f_conll.write(lookup[sentence_id][word_id])
+            word_id+=1
+        f_conll.write(line)
+        
+    f_conll.close()
+
+
+def get_rooted(conll_str):
+    """
+    Returns a list of [id,ctag,head] of the nodes rooted to 0
+    """
+    rooted_elements = []
+    
+    lines = conll_str.split('\n')
+    for l in lines:
+        ls = l.split('\t')
+        try:
+            identifier,tag,head = int(ls[UD_ID_COLUMN]),ls[UD_CTAG_COLUMN],int(ls[UD_HEAD_COLUMN])
+            if head == DUMMY_ROOT:
+                rooted_elements.append((identifier,tag,head))      
+        except ValueError:
+            pass   
+    return rooted_elements
+    
+
+def get_new_single_root(lmultiple_rooted):
+    """
+    Returns the ID of the first VERB rooted to 0 or the leftmost rooted
+    element otherwise
+    """
+    for e in lmultiple_rooted:
+        if e[2] == DUMMY_ROOT and e[1] == UD_CTAG_VERB:
+                return e[0]     
+    return lmultiple_rooted[0][0]
+            
+"""
+"""
+def transform_to_single_root(conll_path):
+    
+    with codecs.open(conll_path) as f_conll:
+        sentences = f_conll.read().split('\n\n')
+    
+    with codecs.open(conll_path,"w") as f_conll:
+        
+        i=0
+        for s in sentences:
+            if s == "": continue
+            rooted = get_rooted(s)
+            if len(rooted) > 1:
+                frv = get_new_single_root(rooted)
+                for l in s.split('\n'):
+                    ls = l.strip().split('\t')   
+                    
+                    if ls != [''] and not l.startswith("#"): #If it is empty line
+                        if ls[UD_HEAD_COLUMN] != "_" and int(ls[UD_HEAD_COLUMN]) == DUMMY_ROOT and int(ls[UD_ID_COLUMN]) != frv:
+                            ls[UD_HEAD_COLUMN] = str(frv)
+                        
+                    f_conll.write('\t'.join(ls)+"\n")
+            else:
+                f_conll.write(s+"\n") 
+            f_conll.write('\n')
+            i+=1
+        
+
+
+
+
+
+
+
+
+
+
+
+

From 9a4ccd95be46b42e4c96a369e8ed70ff3a7474cb Mon Sep 17 00:00:00 2001
From: "david.vilares" <david.vilares@udc.es>
Date: Wed, 24 May 2017 16:16:19 +0200
Subject: [PATCH 2/4] bist-covington: a non-projective transition-based
 BIST-parser

---
 bcovington/covington.py~ | 875 ---------------------------------------
 1 file changed, 875 deletions(-)
 delete mode 100644 bcovington/covington.py~

diff --git a/bcovington/covington.py~ b/bcovington/covington.py~
deleted file mode 100644
index ec982a9..0000000
--- a/bcovington/covington.py~
+++ /dev/null
@@ -1,875 +0,0 @@
-from dynet import *
-from utils_bcovington import read_conll, write_conll, CovingtonConfiguration
-from operator import itemgetter
-from itertools import chain
-from tarjan import tarjan
-import time, random
-import numpy as np
-import os
-import warnings
-
-
-"""
-This is a module extended from original the transition-based BIST-Parser barchybrid:
-
-https://github.com/elikip/bist-parser/blob/master/barchybrid/
-Kiperwasser, E., & Goldberg, Y. (2016). Simple and accurate dependency parsing using bidirectional LSTM feature representations. arXiv preprint arXiv:1603.04351.
-
-
-that has been adapted to include to support non-projective transition-based dependency parsing
-using an implementation (O(n^2)) of the traditional Covington's (2001) algorithm, according
-to the list-based transition-based described in Nivre (2008).
-
-Covington, M. A. (2001). A fundamental algorithm for dependency parsing. In Proceedings of the 39th annual ACM southeast conference (pp. 95-102).
-Nivre, J. (2008). Algorithms for deterministic incremental dependency parsing. Computational Linguistics, 34(4), 513-553.
-
-We also include the O(n) dynamic oracle described in Gomez-Rodriguez and Fernandez-Gonzalez (2015).
-TODO: Current implementation is O(n^2)
-
-Gomez-Rodriguez, C., & Fernandez-Gonzalez, D. (2015). An efficient dynamic oracle for unrestricted non-projective parsing. Volume 2: Short Papers, 256.
-
-"""
-
-
-
-class CovingtonBILSTM:
-    
-    #ACTIVATION FUNCTIONS
-    TANH = 'tanh'
-    SIGMOID = 'sigmoid'
-    RELU = 'relu'
-    TANH3 = 'tanh3'
-    
-    #OPTIMIZERS
-    SGD="sgd"
-    MOMENTUM="momentum"
-    ADAGRAD="adagrad"
-    ADADELTA="adadelta"
-    ADAM = "adam"
-      
-    #SPECIAL INDEXES
-    INDEX_WORD_PAD = 1
-    INDEX_WORD_INITIAL = 2
-    INDEX_POS_PAD = 1
-    INDEX_POS_INITIAL = 2
-    INIT_WORD_INDEX = 3
-    INIT_POS_INDEX = INIT_WORD_INDEX
-    
-    INDEX_FEATS_PAD = 1
-    INDEX_FEATS_INITIAL= 2
-    INIT_FEATS_INDEX = INIT_WORD_INDEX
-    
-    #TRANSITIONS
-    LEFT_ARC = 0
-    RIGHT_ARC = 1
-    SHIFT = 2
-    NO_ARC = 3
-    TRANSITIONS = [LEFT_ARC, RIGHT_ARC, SHIFT, NO_ARC]    
-
-    #OTHER HYPERPARAMETERS
-    SIZE_TRANSITIONS = len(TRANSITIONS)
-    
-    def __init__(self, words, lemmas, cpos, pos, feats, rels, w2i, l2i, options, path_oov_external_embedding=None,
-                 pretrained=False):
-        
-        self.model = Model()
-        if options.optimizer == self.ADAM:
-            self.trainer = AdamTrainer(self.model)
-        elif options.optimizer == self.SGD:
-            self.trainer = SimpleSGDTrainer(self.model)
-        elif options.optimizer == self.MOMENTUM:
-            self.trainer = MomentumSGDTrainer(self.model)
-        elif options.optimizer == self.ADAGRAD:
-            self.trainer = AdagradTrainer(self.model)
-        elif options.optimizer == self.ADADELTA:
-            self.trainer = AdadeltaTrainer(self.model)
-        else:
-            raise NotImplementedError("Selected optimizer is not available")
-                     
-        random.seed(1)
-
-        self.activations = {self.TANH: tanh, 
-                            self.SIGMOID: logistic, 
-                            self.RELU: rectify, 
-                            self.TANH3: (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))}
-        
-        self.activation = self.activations[options.activation]
-
-        self.oracle = options.oracle
-        
-        
-        self.ldims = options.lstm_dims * 2 #*2 because it is a bi-lstm
-        self.wdims = options.wembedding_dims 
-        self.pdims = options.pembedding_dims 
-        self.rdims = options.rembedding_dims 
-        self.layers = options.lstm_layers
-        self.wordsCount = words
-        
-        self.vocab = {word: ind+self.INIT_WORD_INDEX for word, ind in w2i.iteritems()} 
-        self.lemmas = {lemma: ind+self.INIT_WORD_INDEX for lemma,ind in l2i.iteritems()}
-        self.cpos = {cpos: ind+self.INIT_POS_INDEX for ind, cpos in enumerate(cpos)}
-        self.pos = {pos: ind+self.INIT_POS_INDEX for ind, pos in enumerate(pos)}
-        self.feats = {f: ind+self.INIT_FEATS_INDEX for ind, f in enumerate(feats)}
-        self.rels = {word: ind for ind, word in enumerate(rels)}
-        
-        #List of dependency types
-        self.irels = rels 
-
-        self.headFlag = options.headFlag
-        self.rlMostFlag = options.rlMostFlag
-        self.rlFlag = options.rlFlag
-        self.kb = options.window_b
-        self.kl1 = options.window_l1
-        self.kl2_r = options.window_l2r
-        self.kl2_l = options.window_l2l
-
-        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0)
-        
-        #Reading external embedding files, if they exists
-
-        #INFORMATION FOR EXTERNAL WORD EMBEDDINGS
-        self.external_embedding = None
-        self.edim = None
-        self.noextrn = None
-        self.extrnd = None
-        self.elookup = None
-        if options.external_embedding is not None and os.path.exists(options.external_embedding):
-            self.external_embedding, self.edim,self.noextrn,self.extrnd, self.elookup = self._assign_external_embeddings(options.external_embedding,
-                                                                                                                    self.INDEX_WORD_PAD, self.INDEX_WORD_INITIAL)
-        else:
-            warnings.warn("Not using any external file for FORM embeddings")
-                
-        #INFORMATION FOR THE EXTERNAL CPOSTAG EMBEDDINGS
-        self.cpos_external_embedding = None
-        self.cpos_edim = None
-        self.cpos_noextrn = None
-        self.cpos_extrnd = None
-        self.cpos_elookup = None
-        if options.cpos_external_embedding is not None and os.path.exists(options.cpos_external_embedding):
-            self.cpos_external_embedding, self.cpos_edim,self.cpos_noextrn,self.cpos_extrnd, self.cpos_elookup = self._assign_external_embeddings(options.cpos_external_embedding,
-                                                                                                                                             self.INDEX_POS_PAD, self.INDEX_POS_INITIAL)
-        else:
-            warnings.warn("Not using any external file for CPOSTAG embeddings")
-            
-        #INFORMATION FOR THE EXTERNAL POSTAG EMBEDDINGS
-        self.pos_external_embedding = None
-        self.pos_edim = None
-        self.pos_noextrn = None
-        self.pos_extrnd = None
-        self.pos_elookup= None
-        if options.pos_external_embedding is not None and os.path.exists(options.pos_external_embedding):
-            self.pos_external_embedding, self.pos_edim,self.pos_noextrn,self.pos_extrnd, self.pos_elookup = self._assign_external_embeddings(options.pos_external_embedding,
-                                                                                                                                             self.INDEX_POS_PAD, self.INDEX_POS_INITIAL)
-        else:
-            warnings.warn("Not using any external file for POSTAG embeddings")
-            
-        #INFORMATION FOR THE EXTERNAL FEATS EMBEDDINGS
-        self.feats_external_embedding = None
-        self.feats_edim = None
-        self.feats_noextrn = None
-        self.feats_extrnd = None
-        self.feats_elookup= None
-              
-        if options.feats_external_embedding is not None and os.path.exists(options.feats_external_embedding):
-            self.feats_external_embedding, self.feats_edim,self.feats_noextrn,self.feats_extrnd, self.feats_elookup = self._assign_external_embeddings(options.feats_external_embedding,                                                                                                                        self.INDEX_FEATS_PAD, self.INDEX_FEATS_INITIAL)
-        else:
-            warnings.warn("Not using any external file for FEATS embeddings")        
-        
-        
-        #INFORMATION FOR THE EXTERNAL FEATS EMBEDDINGS
-#         self.lemmas_external_embedding = None
-#         self.lemmas_edim = None
-#         self.lemmas_noextrn = None
-#         self.lemmas_extrnd = None
-#         self.lemmas_elookup= None
-              
-#         if options.lemmas_external_embedding is not None and os.path.exists(options.lemmas_external_embedding):
-#             self.lemmas_external_embedding, self.lemmas_edim,self.lemmas_noextrn,self.lemmas_extrnd, self.lemmas_elookup = self._assign_external_embeddings(options.lemmas_external_embedding,                                                                                                                        self.INDEX_FEATS_PAD, self.INDEX_FEATS_INITIAL)
-#         else:
-#             warnings.warn("Not using any external file for LEMMAS embeddings")        
-        
-        
-        
-        
-        self.oov_external_embedding = None
-        self.oov_edim = None
-        self.oov_noextrn = None
-        self.oov_extrnd = None
-        self.oov_elookup = None
-        
-        
-        if path_oov_external_embedding is not None and os.path.exists(options.feats_external_embedding):
-                        self.oov_external_embedding, self.oov_edim,self.oov_noextrn,self.oov_extrnd, self.oov_elookup = self._assign_external_embeddings(path_oov_external_embedding,
-                                                                                                                    self.INDEX_WORD_PAD, self.INDEX_WORD_INITIAL) 
-
-        if self.oov_external_embedding is not None and self.oov_edim != self.edim:
-            raise ValueError("The dimensions of the embeddings for OOV words is not equal to the dimension of the rest of external word embeddings (self.oov_edim != self.edim)")
-            
-        #Obtaining the dimension of the input
-        dims = (self.wdims + self.pdims + (self.edim if self.external_embedding is not None else 0) + 
-                                          (self.cpos_edim if self.cpos_external_embedding is not None else 0) +
-                                          (self.pos_edim if self.pos_external_embedding is not None else 0)+
-                                          (self.feats_edim if self.feats_external_embedding is not None else 0)
-#                                           +
-#                                           (self.lemmas_edim if self.lemmas_external_embedding is not None else 0)
-                                          )
-        
-        
-        #Initialization of the architecture
-        
-        self.blstmFlag = options.blstmFlag
-        self.bibiFlag = options.bibiFlag
-
-        if self.bibiFlag:
-            self.surfaceBuilders = [VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model),
-                                    VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model)]
-            self.bsurfaceBuilders = [VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model),
-                                     VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model)]
-        elif self.blstmFlag:
-            if self.layers > 0:
-                self.surfaceBuilders = [VanillaLSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model), LSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model)]
-            else:
-                self.surfaceBuilders = [SimpleRNNBuilder(1, dims, self.ldims * 0.5, self.model), LSTMBuilder(1, dims, self.ldims * 0.5, self.model)]
-
-
-        self.hidden_units = options.hidden_units
-        self.hidden2_units = options.hidden2_units
-        self.vocab['*PAD*'] = self.INDEX_WORD_PAD
-        self.cpos['*PAD*'] = self.INDEX_POS_PAD
-        self.feats['*PAD*'] = self.INDEX_FEATS_PAD
-
-        self.vocab['*INITIAL*'] = self.INDEX_WORD_INITIAL
-        self.cpos['*INITIAL*'] = self.INDEX_POS_INITIAL
-        self.feats['*INITIAL*'] = self.INDEX_FEATS_INITIAL
-
-        self.wlookup = self.model.add_lookup_parameters((len(words) + self.INIT_WORD_INDEX, self.wdims))
-        self.plookup = self.model.add_lookup_parameters((len(cpos) + self.INIT_POS_INDEX, self.pdims))
-        self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims))
-
-        
-        self.word2lstm = self.model.add_parameters((self.ldims, dims))
-        
-        self.word2lstmbias = self.model.add_parameters((self.ldims))
-        self.lstm2lstm = self.model.add_parameters((self.ldims, self.ldims * self.nnvecs + self.rdims))
-        self.lstm2lstmbias = self.model.add_parameters((self.ldims))
-
-        self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * self.nnvecs * (self.kl1 + self.kl2_l + self.kl2_r  + self.kb)))
-        self.hidBias = self.model.add_parameters((self.hidden_units))
-
-        self.hid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
-        self.hid2Bias = self.model.add_parameters((self.hidden2_units))
-
-        self.outLayer = self.model.add_parameters((self.SIZE_TRANSITIONS, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
-        self.outBias = self.model.add_parameters((self.SIZE_TRANSITIONS))
-
-        self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * self.nnvecs * (self.kl1 + self.kl2_l + self.kl2_r  + self.kb)))
-        self.rhidBias = self.model.add_parameters((self.hidden_units))
-
-        self.rhid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
-        self.rhid2Bias = self.model.add_parameters((self.hidden2_units))
-
-        self.routLayer = self.model.add_parameters((2 * (len(self.irels) + 0) + 1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
-        self.routBias = self.model.add_parameters((2 * (len(self.irels) + 0) + 1))
-
-        self.pretrained = pretrained
-
-
-    def _assign_external_embeddings(self,option_external_embedding,
-                                    index_pad,index_initial):
-        """
-        Reads an external embedding file
-        Returns:
-        external_embedding: A dictionary of key:embedding
-        edim: Dimension of the embedding
-        noextrn: ??
-        extrnd: Index for each key
-        elookup: Parameter lookup 
-        """
-            
-
-        if option_external_embedding is not None:
- 
-            external_embedding_fp = open(option_external_embedding,'r')
-            external_embedding_fp.readline()
-                
-            external_embedding = {line.split(' ')[0] : [float(f) for f in line.strip().split(' ')[1:]] 
-                                           for line in external_embedding_fp}
-            
-            
-            external_embedding_fp.close()
-    
-            edim = len(external_embedding.values()[0])
-            noextrn = [0.0 for _ in xrange(edim)]
-            extrnd = {element: i + self.INIT_POS_INDEX 
-                                    for i, element in enumerate(external_embedding)}
-            elookup = self.model.add_lookup_parameters((len(external_embedding) + self.INIT_WORD_INDEX, edim))
-                
-            for element, i in extrnd.iteritems():
-                    elookup.init_row(i, external_embedding[element])
-            extrnd['*PAD*'] = index_pad
-            extrnd['*INITIAL*'] = index_initial
-
-            return external_embedding, edim, noextrn, extrnd, elookup
-    
-        return None,None,None,None,None
-
-
-
-    def __evaluate(self, c, train):
-        """
-        @param c: A CovingtonConfiguration instance
-        @param train: True if used in the training phase, False otherwise
-        Returns the scores for all possible transitions (training)
-        or the top ones (testing) for a given configuration c
-        """
-              
-        #Gets the embeddings for the terms to be used in the prediction
-        top_l1  = [c.sentence[c.l1-i].lstms if c.l1 - i > 0 else [self.empty] for i in xrange(self.kl1)]
-        top_l2l = [c.sentence[c.l1+1+i].lstms if c.l1+1+i < c.b  else [self.empty] for i in xrange(self.kl2_l)]
-        top_l2r = [c.sentence[c.b-i].lstms if c.b-i > c.l1 else [self.empty] for i in xrange(self.kl2_r)]
-        topBuffer = [c.sentence[c.b+i-1].lstms if c.b+i-1 <= c.sentence[-1].id else [self.empty] for i in xrange(self.kb)]
-
-        input = concatenate(list(chain(*(top_l1 + top_l2l + top_l2r + topBuffer))))
-
-        if self.hidden2_units > 0:
-            routput = (self.routLayer.expr() * self.activation(self.rhid2Bias.expr() + self.rhid2Layer.expr() * self.activation(self.rhidLayer.expr() * input + self.rhidBias.expr())) + self.routBias.expr())
-        else:
-            routput = (self.routLayer.expr() * self.activation(self.rhidLayer.expr() * input + self.rhidBias.expr()) + self.routBias.expr())
-
-        if self.hidden2_units > 0:
-            output = (self.outLayer.expr() * self.activation(self.hid2Bias.expr() + self.hid2Layer.expr() * self.activation(self.hidLayer.expr() * input + self.hidBias.expr())) + self.outBias.expr())
-        else:
-            output = (self.outLayer.expr() * self.activation(self.hidLayer.expr() * input + self.hidBias.expr()) + self.outBias.expr())
-
-        scrs, uscrs = routput.value(), output.value()
-
-        if train:
-            left_arc_info = [(rel,self.LEFT_ARC, scrs[1+j*2] + uscrs[self.LEFT_ARC], routput[1+j*2]+ output[self.LEFT_ARC]) 
-                                for j, rel in enumerate(self.irels) if c.l1 > 0 and c.l1 < c.b and c.b <= c.sentence[-1].id]
-    
-            right_arc_info = [(rel,self.RIGHT_ARC, scrs[2+j*2] + uscrs[self.RIGHT_ARC], routput[2+j*2]+ output[self.RIGHT_ARC]) 
-                                 for j, rel in enumerate(self.irels) if c.l1 >= 0 and c.l1 < c.b and c.b <= c.sentence[-1].id]
-            
-            shift_info = [ (None, self.SHIFT, scrs[0] + uscrs[self.SHIFT], routput[0] + output[self.SHIFT]) ] if c.b <= c.sentence[-1].id else []
-
-            no_arc_info = [(None, self.NO_ARC,scrs[3] + uscrs[self.NO_ARC], routput[3] + output[self.NO_ARC] )] if c.l1> 0 and  c.b <= c.sentence[-1].id else []
-            
-            ret = [left_arc_info,right_arc_info, shift_info, no_arc_info]
-                            
-        else:
-            #It is done different from the 'train' phase, due to the dynamic oracle.
-            #In the test phase we already pick the most likely transition/dependency instead of returning them all
-            #and then selecting one according to the prediction of the dynamic oracle
-            sLEFT,rLEFT = max(zip(scrs[1::2],self.irels))
-            sRIGHT,rRIGHT = max(zip(scrs[2::2],self.irels))
-            sLEFT += uscrs[self.LEFT_ARC]
-            sRIGHT += uscrs[self.RIGHT_ARC]
-            ret = [ [(rLEFT, self.LEFT_ARC, sLEFT) ] if (c.l1 > 0 and c.l1 < c.b and c.b <= c.sentence[-1].id and self._is_valid_left_arc(c)) else [],  
-                    [(rRIGHT, self.RIGHT_ARC, sRIGHT) ] if (c.l1 >= 0 and c.l1 < c.b and c.b <= c.sentence[-1].id and self._is_valid_right_arc(c)) else [],  
-                    [(None, self.SHIFT, scrs[0] + uscrs[self.SHIFT]) ] if (c.b <= c.sentence[-1].id) else [],
-                    [(None, self.NO_ARC,scrs[3] + uscrs[self.NO_ARC]) ] if (c.l1 > 0 and c.b <= c.sentence[-1].id) else []
-                     ]
-        return ret
-
-
-    def Save(self, filename):
-        self.model.save(filename)
-
-
-    def Load(self, filename):
-        self.model.load(filename)
-
-    def Init(self):
-        evec = self.elookup[1] if self.external_embedding is not None  else None
-        cpos_evec = self.cpos_elookup[1] if self.cpos_external_embedding is not None else None
-        pos_evec = self.pos_elookup[1] if self.pos_external_embedding is not None else None
-        feats_evec = self.feats_elookup[1] if self.feats_external_embedding is not None else None
-      #  lemmas_evec = self.lemmas_elookup[1] if self.lemmas_external_embedding is not None else None
-        paddingWordVec = self.wlookup[1]
-        paddingPosVec = self.plookup[1] if self.pdims > 0 else None
-      #  paddingVec = tanh(self.word2lstm.expr() * concatenate(filter(None, [paddingWordVec, paddingPosVec, evec, cpos_evec, pos_evec, feats_evec, lemmas_evec])) + self.word2lstmbias.expr())
-        paddingVec = tanh(self.word2lstm.expr() * concatenate(filter(None, [paddingWordVec, paddingPosVec, evec, cpos_evec, pos_evec, feats_evec])) + self.word2lstmbias.expr())
-        self.empty = paddingVec if self.nnvecs == 1 else concatenate([paddingVec for _ in xrange(self.nnvecs)])
-
-
-    def getWordEmbeddings(self, sentence, train):
-        """
-        Gets the embeddings (also external) for every term in a sentence
-        Returns a vector of all embeddings concatenated
-        """
-        
-        for root in sentence:
-            c = float(self.wordsCount.get(root.norm, 0))
-            dropFlag =  not train or (random.random() < (c/(0.25+c)))
-            sys.stdout.flush()
-            root.wordvec = self.wlookup[int(self.vocab.get(root.norm, 0)) if dropFlag else 0]
-            root.cposvec = self.plookup[int(self.cpos.get(root.cpos,0))] if self.pdims > 0 else None
-
-            #For word embeddings
-            if self.external_embedding is not None:
-                if root.form in self.external_embedding:
-                    root.evec = self.elookup[self.extrnd[root.form]]
-                elif root.norm in self.external_embedding:
-                    root.evec = self.elookup[self.extrnd[root.norm]]
-                else:
-                    if (self.oov_external_embedding is not None and root.form.replace(" ","_") in self.oov_external_embedding):
-                        root.evec = self.oov_elookup[self.oov_extrnd[root.form.replace(" ","_")]]
-                    else:
-                        root.evec = self.elookup[0]
-            else:
-                root.evec = None
-
-            #For cpostag embeddings
-            if self.cpos_external_embedding is not None:
-                if root.cpos in self.cpos_external_embedding:
-                    root.cposevec = self.cpos_elookup[self.cpos_extrnd[root.cpos]]
-                else:
-                    root.cposevec = self.cpos_elookup[0]
-            else:
-                root.cposevec = None
-            
-            #For postag embeddings
-            if self.pos_external_embedding is not None:
-                if root.pos in self.pos_external_embedding:
-                    root.posevec = self.pos_elookup[self.pos_extrnd[root.pos]]
-                else:
-                    root.posevec = self.pos_elookup[0]
-            else:
-                root.posevec = None
-#             
-            #For feats embeddings
-            if self.feats_external_embedding is not None:
-                if root.feats in self.feats_external_embedding:
-                    root.featsevec = self.feats_elookup[self.feats_extrnd[root.feats]]
-                else:
-                    root.featsevec = self.feats_elookup[0]
-            else:
-                root.featsevec = None
-            
-            
-            #For lemmas embeddings
-#             if self.lemmas_external_embedding is not None:
-#                 if root.lemma in self.lemmas_external_embedding:
-#                     root.lemmasevec = self.lemmas_elookup[self.lemmas_extrnd[root.lemma]]
-#                 else:
-#                     root.lemmasevec = self.lemmas_elookup[0]
-#             else:
-#                 root.lemmasevec = None            
-            
-            
-         #   root.ivec = concatenate(filter(None, [root.wordvec, root.cposvec, root.evec, root.cposevec, root.posevec, root.featsevec, root.lemmasevec]))
-            root.ivec = concatenate(filter(None, [root.wordvec, root.cposvec, root.evec, root.cposevec, root.posevec, root.featsevec]))
-            
-        if self.blstmFlag:
-            forward  = self.surfaceBuilders[0].initial_state()
-            backward = self.surfaceBuilders[1].initial_state()
-
-            for froot, rroot in zip(sentence, reversed(sentence)):
-                forward = forward.add_input( froot.ivec )
-                backward = backward.add_input( rroot.ivec )
-                froot.fvec = forward.output()
-                rroot.bvec = backward.output()
-            for root in sentence:
-                root.vec = concatenate( [root.fvec, root.bvec] )
-
-            if self.bibiFlag:
-                bforward  = self.bsurfaceBuilders[0].initial_state()
-                bbackward = self.bsurfaceBuilders[1].initial_state()
-
-                for froot, rroot in zip(sentence, reversed(sentence)):
-                    bforward = bforward.add_input( froot.vec )
-                    bbackward = bbackward.add_input( rroot.vec )
-                    froot.bfvec = bforward.output()
-                    rroot.bbvec = bbackward.output()
-                for root in sentence:
-                    root.vec = concatenate( [root.bfvec, root.bbvec] )
-
-        else:
-            for root in sentence:
-                root.ivec = (self.word2lstm.expr() * root.ivec) + self.word2lstmbias.expr()
-                root.vec = tanh( root.ivec )
-
-
-    def Predict(self, conll_path):
-        """
-        Makes non-projective depending parsing prediction given a ConLL-X file
-        """
-    
-        
-        with open(conll_path, 'r') as conllFP:
-            for iSentence, sentence in enumerate(read_conll(conllFP)):
-                self.Init()
-
-                l1 = sentence[0].id
-                b = sentence[1].id
-                arcs = set([])   
-                
-                self.getWordEmbeddings(sentence, False)
-
-                for root in sentence:
-                    root.lstms = [root.vec for _ in xrange(self.nnvecs)]
-
-                hoffset = 1 if self.headFlag else 0
-
-                c = CovingtonConfiguration(l1,b,sentence,arcs)
-                while not self._is_final_state(b,sentence):
-
-                    transition_scores = self.__evaluate(c, False)
-
-                    
-                    best = max(chain(*transition_scores), key = itemgetter(2) )
-
-                    if best[1] == self.LEFT_ARC:
-                        
-                        sentence[l1].pred_parent_id = sentence[b].id
-                        sentence[l1].pred_relation = best[0]
-                        best_op = self.LEFT_ARC
-                        if self.rlMostFlag:
-                            sentence[b].lstms[best_op+hoffset] = sentence[l1].lstms[best_op+hoffset]
-                        if self.rlFlag:
-                            sentence[b].lstms[best_op+hoffset] = sentence[l1].vec
-
-                        arcs.add((b,l1))
-                        l1 = l1 -1
-                        
-                    elif best[1] == self.RIGHT_ARC:
-                        
-                        sentence[b].pred_parent_id = sentence[l1].id
-                        sentence[b].pred_relation = best[0]
-
-                        best_op = self.RIGHT_ARC
-                        if self.rlMostFlag:
-                            sentence[l1].lstms[best_op+hoffset] = sentence[b].lstms[best_op+hoffset]
-                        if self.rlFlag:
-                            sentence[l1].lstms[best_op+hoffset] = sentence[b].vec
-                        
-                        arcs.add((l1,b))
-                        l1 = l1-1
-
-                    elif best[1] == self.SHIFT:
-                        l1 = b
-                        b = b + 1
-
-
-                    elif best[1] == self.NO_ARC:
-                        l1 = l1 - 1
-
-                    c = CovingtonConfiguration(l1,b,sentence,arcs)
-                renew_cg()
-                yield sentence
-
-
-    def Train(self, conll_path):
-        """
-        Trains a O(n^2) Covington's parser with a O(n^2) dynamic oracle
-        """
-        mloss = 0.0
-        errors = 0
-        batch = 0
-        eloss = 0.0
-        eerrors = 0
-        lerrors = 0
-        etotal = 0
-        ltotal = 0
-        ninf = -float('inf')
-
-        hoffset = 1 if self.headFlag else 0
-
-        start = time.time()
-
-        with open(conll_path, 'r') as conllFP:
-            shuffledData = list(read_conll(conllFP))
-            
-            random.shuffle(shuffledData)
-
-
-            errs = []
-            eeloss = 0.0
-
-            self.Init()
-
-            for iSentence, sentence in enumerate(shuffledData):    
-                if iSentence % 100 == 0 and iSentence != 0:
-                    print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Labeled Errors:', (float(lerrors) / etotal) , 'Time', time.time()-start                  
-                    start = time.time()
-                    eerrors = 0
-                    eloss = 0.0
-                    etotal = 0
-                    lerrors = 0
-                    ltotal = 0
-
-                self.getWordEmbeddings(sentence, True)
-                #We obtain the gold arcs to then compute the dynamic oracle for covington
-                gold_arcs = set([])
-                for word in sentence:
-                    
-                    #TODO: Weird error if not, adds and arc (0,0)
-                    if word.id != word.parent_id:
-                        gold_arcs.add((word.parent_id,word.id))
-                
-                
-                l1 = sentence[0].id
-                b = sentence[1].id
-                arcs = set([])               
-                c = CovingtonConfiguration(l1,b,sentence,arcs)
-                loss_c = self._loss(c,gold_arcs, iSentence)
-            
-                for word in sentence:
-                    word.lstms = [word.vec for _ in xrange(self.nnvecs)]
-
-                hoffset = 1 if self.headFlag else 0
-
-                while not self._is_final_state(b,sentence):
-
-                    costs = [None,None,None,None]
-                    transition_scores = self.__evaluate(c, True)
-
-                    #We determine if the transitions are valid for a given configuration c
-                    for t in self.TRANSITIONS:
-                        
-                        l1_aux = l1
-                        b_aux = b
-                        arcs_aux =  set(arcs)
-                        valid_transition = False
-                        
-                        if t == self.LEFT_ARC and self._is_valid_left_arc(c):
-                            arcs_aux.add((b_aux,l1_aux))
-                            l1_aux = l1_aux -1
-                            valid_transition = True
-
-                        if t == self.RIGHT_ARC and l1 >=0 and self._is_valid_right_arc(c):
-                            arcs_aux.add((l1_aux,b_aux))
-                            l1_aux = l1_aux-1
-                            valid_transition = True
-                                 
-                        if t == self.NO_ARC and l1 >0:
-                            l1_aux = l1_aux-1
-                            valid_transition = True   
-                               
-                        if t == self.SHIFT:
-                            l1_aux = b_aux
-                            b_aux = b_aux + 1 
-                            valid_transition = True      
-                        
-                        if valid_transition:  
-             
-                            new_c = CovingtonConfiguration(l1_aux,b_aux,sentence,arcs_aux)
-                            loss_new_c = self._loss(new_c,gold_arcs,iSentence)
-                                               
-                            cost = loss_new_c - loss_c
-                            costs[t] = float(cost)
-
-                    #Valid transitions are those with cost 0
-                    #If it is a LEFT/RIGHT arc, also the relation must match with the one in gold standard
-                    valid_transitions = [s for s in chain(*transition_scores) if costs[s[1]] == 0 and (s[1] in [self.SHIFT,self.NO_ARC] 
-                                                                                                          or ((s[1] == self.LEFT_ARC and s[0] == sentence[l1].relation) 
-                                                                                                          or (s[1] == self.RIGHT_ARC and s[0] == sentence[b].relation)))]
-
-                    best_valid = max(valid_transitions, key=itemgetter(2))
-
-                    wrong_transitions = [s for s in chain(*transition_scores) if costs[s[1]] is not None and ( (costs[s[1]] != 0) or (s[1] in [self.LEFT_ARC,self.RIGHT_ARC] 
-                                                                                                          and ((s[1] == self.LEFT_ARC and s[0] != sentence[l1].relation) 
-                                                                                                              or (s[1] == self.RIGHT_ARC and s[0] != sentence[b].relation))) ) ]
-                    
-                    #Aggressive exploration as done by Kiperwasser and Golberg (2016)
-                    if wrong_transitions != []:
-                        best_wrong = max(wrong_transitions, key=itemgetter(2))    
-
-                        best = best_valid if ( (not self.oracle) or (best_valid[2] - best_wrong[2] > 1.0) 
-                                              or (best_valid[2] > best_wrong[2] and random.random() > 0.1) ) else best_wrong
-                    else:
-                        best = best_valid 
-
-
-                    #Moving a new configuration based on the "best" choice
-                    if best[1] == self.LEFT_ARC:
-                                      
-                        sentence[l1].pred_parent_id = sentence[b].id
-                        sentence[l1].pred_relation = best[0]
-
-                        best_op = self.LEFT_ARC
-                        if self.rlMostFlag:
-                            sentence[b].lstms[best_op+hoffset] = sentence[l1].lstms[best_op+hoffset]
-                        if self.rlFlag:
-                            sentence[b].lstms[best_op+hoffset] = sentence[l1].vec
-                        
-                        child = sentence[l1]
-                        arcs.add((b,l1))
-                        l1 = l1 -1
-                        
-                    elif best[1] == self.RIGHT_ARC:
-                        
-                        
-                        sentence[b].pred_parent_id = sentence[l1].id
-                        sentence[b].pred_relation = best[0]
-
-                        best_op = self.RIGHT_ARC
-                        if self.rlMostFlag:
-                            sentence[l1].lstms[best_op+hoffset] = sentence[b].lstms[best_op+hoffset]
-                        if self.rlFlag:
-                            sentence[l1].lstms[best_op+hoffset] = sentence[b].vec
-                        
-                        arcs.add((l1,b))
-                        child = sentence[b]
-                        l1 = l1-1
-
-
-                    elif best[1] == self.SHIFT:
-                        l1 = b
-                        child = sentence[b]
-                        b = b + 1
-
-
-                    elif best[1] == self.NO_ARC:
-                        l1 = l1 - 1
-                        child = sentence[l1]
-
-
-                    if best_valid[2] < best_wrong[2] + 1.0:
-                        loss = best_wrong[3] - best_valid[3]
-                        mloss += 1.0 + best_wrong[2] - best_valid[2]
-                        eloss += 1.0 + best_wrong[2] - best_valid[2]
-                        errs.append(loss)
-
-                    
-                    if best[1] not in [self.SHIFT, self.NO_ARC] and (child.pred_parent_id != child.parent_id or child.pred_relation != child.relation):
-                        lerrors += 1
-                        if child.pred_parent_id != child.parent_id:
-                            errors += 1 
-                            eerrors += 1 
-
-                    etotal += 1
-                    c = CovingtonConfiguration(l1,b,sentence,arcs)
-                    loss_c = self._loss(c,gold_arcs, iSentence)
-                 
-
-                if len(errs) > 50: 
-                    eerrs = esum(errs)
-                    scalar_loss = eerrs.scalar_value()
-                    eerrs.backward()
-                    self.trainer.update()
-                    errs = []
-                    lerrs = []
-
-                    renew_cg()
-                    self.Init()
-
-        if len(errs) > 0:
-            eerrs = (esum(errs)) # * (1.0/(float(len(errs))))
-            eerrs.scalar_value()
-            eerrs.backward()
-            self.trainer.update()
-
-            errs = []
-            lerrs = []
-
-            renew_cg()
-
-        self.trainer.update_epoch()
-        print "Loss: ", mloss/iSentence
-
-
-    def _is_final_state(self,b,sentence):
-        return b >= len(sentence)
-
-
-    def _is_valid_left_arc(self,c):
-        
-        aux = set(c.A)
-        aux.add((c.b,c.l1))
-        l1_has_head = self._y_has_head(c.A, c.b, c.l1) 
-        return (c.l1 > 0 and not l1_has_head
-                and self._count_cycles(aux) == 0)
-
-
-    def _is_valid_right_arc(self,c):
-        
-        b_has_head = self._y_has_head(c.A, c.l1, c.b)
-        aux = set(c.A)
-        aux.add((c.l1,c.b))
-        return ((not b_has_head) and self._count_cycles(aux) == 0)
-        
-        
-    """
-    Gomez-Rodriguez & Fernandez-Gonzalez: 
-    An Efficiente Dynamic Oracle for Unrestricted Non-Projective Parsing  (ACL,2015)
-    Algorithm 1
-    """
-    def _loss(self, c, gold_arcs, iSentence):
-        
-        U = set([]) #set of unreachable nodes
-        non_built_arcs = gold_arcs.difference(c.A)
-        
-        
-        i = c.l1
-        j = c.b
-              
-        for x,y in non_built_arcs: 
-            left = min(x,y)  #O(n)
-            right = max(x,y) #O(n)
-            if (j > right or (j==right and i < left) or self._y_has_head(c.A,x,y)
-                or self._weakly_connected(c.A, x, y,c, gold_arcs)):
-                U.add((x,y))
-        
-        I = gold_arcs.difference(U)
-
-        return len(U) + self._count_cycles( c.A.union(I))
-    
-    
-    #TODO: This can be done more efficient
-    #O(n^2)
-    def _weakly_connected(self,A,x,y,c, gold_arcs):
-        
-        weakly_connected = False 
-        end_path = False
-        parent = x
-        
-        while parent != 0 and not weakly_connected and not end_path and  A  != set([]):
-            if (parent,y) in A:
-                weakly_connected = True
-                break
-            else:     
-
-                for (a,b) in A:
-                    if b == parent: 
-                        parent = a
-                        break
-                    else:
-                        end_path = True
-                        
-                    
-        return weakly_connected
-    
-    
-    """
-    Tarjan (1972) implementation at https://github.com/bwesterb/py-tarjan/
-    O(n)
-    """
-    def _count_cycles(self, A):
-        
-        d = {}
-        for a,b in A:
-            if a not in d:
-                d[a] = [b]
-            else:
-                d[a].append(b)
-                   
-        return sum([1 for e in tarjan(d) if len(e) > 1])
-    
-    
-    """
-    Determines if node y has already a head
-    """
-    #O(n)
-    def _y_has_head(self,A,x,y):
-        
-        for z,y_prime in A:
-            if y_prime == y and z != x:
-                return True
-        return False
-     
-    #O(n)
-#     def violates_single_root(self, A):
-#         print A,[1 for (h,d) in A if h==0], len([1 for (h,d) in A if h==0]) != 0 
-#         return len([1 for (h,d) in A if h==0]) != 0 
-    

From 9edba125f7f72d97ec74a894f01e15e51e52d48b Mon Sep 17 00:00:00 2001
From: "david.vilares" <david.vilares@udc.es>
Date: Wed, 24 May 2017 16:17:55 +0200
Subject: [PATCH 3/4] bist-covington: a non-projective transition-based
 BIST-parser

---
 bcovington/parser.py~     |   0
 bcovington/src/parser.pyc | Bin 6247 -> 0 bytes
 bcovington/src/utils.pyc  | Bin 8418 -> 0 bytes
 bcovington/utils.py       | 269 --------------------------------------
 4 files changed, 269 deletions(-)
 delete mode 100644 bcovington/parser.py~
 delete mode 100644 bcovington/src/parser.pyc
 delete mode 100644 bcovington/src/utils.pyc
 delete mode 100644 bcovington/utils.py

diff --git a/bcovington/parser.py~ b/bcovington/parser.py~
deleted file mode 100644
index e69de29..0000000
diff --git a/bcovington/src/parser.pyc b/bcovington/src/parser.pyc
deleted file mode 100644
index e309ab49c6418551993256ceed56deaca7be950c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6247
zcma)AU3(M96`qwXgTW+D2sQ))n*;|#A_<!$<b#m#VH1Kej!c?llXSb*j;)nfyUWf<
zveTqr^t)+u*XL>b2il*|oBn{_^}aX#1HI|fi(a+wIkS=mWZK#wotgKXGv|8-#{O~W
z!0CH)A2&qtGmY<e@K^n7Qiu?L91#oAc7*2$ht?$#mxNbh4D^`r#`^TQ@W%UeS$O3>
zJt4e_KD|$P`}*{L;qC9!lfs+q(+7lipidtZ-a#R!L^vk4OCta0q;$l6A-qFE9ulp`
zM2Hh{5cESWv_VNp+;_&D0)`^M2N|9MQ0fDR7=QvuJkId6E%yY&Pcm>=gind^2(!+B
zX}qsvmYI$+(=m(bX`AUdGo4_jXDp_ZHq$9)a+&E_i|IL=>3L>4%}jFzljZ6Q44+|m
zeiS~-@HvKG9ED$ExWaIC6s|FRp5cW8rqC_g23`>1%R){w>nj|tR{;-lanXWbWB7F&
ze#3&_9EC4g@LLRDwx!>;;CDvPzGA^w8NOyqU$@{ThL>&lh6TUN@J$<j&w_7_O0QV(
z?NNA@VN-B-7+$lLzt1o#5p`T=c-@BYTJQ%9f5V19wBU~z{@8{+hE2vkVfa%U{>*~E
z$*^z3-?HF<;SC#Zh%gkPWXm@RmWOS4%YvJuaKvznrMDS=%9f5TxIGFd7Mza4nFZe)
zg>%a>MR2I?vpbe_cT~D(!8;GZM|gV82stBKvzW5`<hW=Z6|G~uLxV%8ugbJ}e7NHz
z7`(gU#=9hSLS%m^6Y)%+I4RVh%HijP9}ulmbOK9YPGh2coAyhh^(^b7fg(P|<b7Uv
zr-e5sycdLbMtJkWJ1e|%LcS;pi(V?ur=u$3J|_Mb^!fFo@Tx+VMXM&{^Fp2$a!$w>
zgghhUypU&wgpwJtiELMi-}w*&`TdW=FJVUb9kOLiw9d0A<ZK~mf*ORz758xkV8X-t
zlGvUU`R^vks0D^_4<Q_6)uPD%G9f2L3pQhUfn57~MtGP(@b?uVU#1=JRbfnk)XvE<
zd6Ai4Bk1~PPdMJ|BK)qnAzvrcaHYJ#Fo(%`C=%iKgnUwfGh7z45T6adk8eczO+N6F
zkRYJqt)V#-fWp*tA6a#onJ^X!!1`@kLl-CP2V?^~rhd<b@s0@Z7s_VCABtJ<&W1lK
zzJJU{I0>ST;DGff1J<AJ%}T~y*(<+S$sV%e-rpuZyoXD65L8?UKjXAt?5F)2Cl+zN
zF64FbDK73ul#;hp1fE=47II1W_ym1}X?%iymubw@pCboL|GPXeoMEXc4|sEvmB4)1
zUS#V%-nu1PE7S-8kga3kFX;K;-5zoNworfHAAThgykdSyj(V#skJ_qOVYo$#sJ8C(
zYwHf}*tI1mL~DY(dripqtwLZ~Vy=rJ&w9UPa9rI~11CQJ^<+^d03IllRd#Yv`s&Y<
zMcPe;+(Kngg{&48Lf=J&P(tkrAy%r8Jy>bbl*Ib{ukZwn{*9k`2b6vcA6N3NPFp5=
zEy$J3SM`4E`F=Zy65m(*u&7j`B<pBB4my-d>oSPpR#V2An!(mupqs8vUEP!}!|rAj
zOHC?5xfyg~t)@U1>U_PMNqwNNikIp#mg{;qmTtQFkRJDwJm`JVNRv43eCE#Md)MtK
z>GmR=TB&5Y45NlVt0wq}EP`?}m<ihZ;5w1&F39CPXv<!jZ<B(MOb(6J#1Th{th#qd
z{c}d<IahVEEX_4|AAPQgl+F=1eGrGRr9Q9K(^y|gl2iwz<Jz6ol@*tfVT9UJ2i+iN
zo0o5{EGZW<)oLvabWmHnd(~gR?q9&~`5FsWS-ql6R4SqDYP{-;Be7CPv3mGG?DKKz
z<fAl$qtI)Ds43Z$p~4p|)v-M^6Q5Ci>QSnRrO-|Fg?t4_g3@%5A=Tp`QG-a0mbo(e
z4h`UH7B#kGSrnw)(Rreu1PV&#Nf7(8y&=OeO19KtY%dkt?r^&(J{I*GS*r9b7>V%r
zP;wmDwY79z2U~6*wv|zeD;1-RbWsC5LKS8Izi1*K`<gaopp~ZVF}0Urlz?So+tVyl
zh3zzyF}EpG>M6*qq(N9)54zIjJriI0SK&q@?ILVCO{#|5G-+=TIj%|WR!Y%u-w&g<
zibCuaq0NRO#5r6VPO`k<%pc?&E}2uA5|LTjXqpl+WJHx)fXyfjF|^25L;9U0(n?Vu
zR4Qg`QB!OS+Tam3grdPvscdiXtm{QdnD+dQh$dnmTHuB%pm-q~Bg}N$_F~RW(FYgv
z0T4e3#D%svXG3Bc<Ow3}=NJQ9zx@sU{PFF=pR}XHDCd3vN#mAMnM*}uvxr*I&`~$g
zQJU~M){-33LDE#FEYnO!?dWrvvlqh^u>~Re&ZIb2x*Z3*=r*2MwDiq7Dv`g``jrZ8
zQ>IxL^Kjn3Bo3n|+)bpeC@DiO6(S<Co?9B@DN?~kEYmz_;5wj=C(Gdk8LM=fOMf$n
zm4OVGqKj!Dxk0D$@k}kp!B*kuMzj&JfgBK{OXnTQi6xMEO-*3dH)RkqM1-XVP#(9@
z#cW+3Gh|wcA=aM9x4~ga8MoWA&8cc6i4Qsw2wh<vkIlvN3m4h;!K$dy;>O}A6!bv(
zUAjh4D%EOLts)d3ryaN5!IXtLuEdQebGK3%nC+&6<$xp?-T7SzHiCrZ+9;(ci{)7}
zYIBLfi!<DA+6Xo@Rez7i`!bGJO<YusG>#=w)#p*qfafOKEDGZ2bK2&7fUg)JZ*bl)
zGjkT=uI1R_X7PFD^Zi`h1T-_3c4VW&+#H{Ui@xll7-(i@NQ$fSMh$qZYP4U?Vz;v8
za$mU><tpk+_p;fj>K(1Fkl?OvrimrmSKSF&do|m2D_ytJvsijE+G=W5ZN!0cIT+?M
zS98!<?<ACtVs!G8LpLJ@&y&q)tAmC{J@egUT<dU%>C*ElwS~VlZ6Dg}%+rH}Ycj~U
zkPwAeBkj<vVZV#oDI%<?F;RwmZ}N?3rgMZ_lcuJY?TlYt)atuIJ7&%X?t_NXJj8Yt
zGxp3ZG7H~NaP9iOQOoZ@o~%I_`Y?;1hkVXzicTjrc;v8&n{gUwqgb2A%Ux<evVNI+
znq~;QMT!(FA)@{=_IZ`z)yU_{oe1xXx=R4bx3@76>*qn$7wDr#0J#N5t)BovqP}it
zK6&h?X_w`=oa@x{yu>>%G_Lm+BR(jW?RH?cVr-F0eh!ddbd}TL<{~C2mzX|8q#5oT
zB3fyb)M-F*AmJPa>So4J*lA~)?!IgNmbiLzW&Q4LKDU$LZMj{)0Ju(rlRJOY=i>Cy
zA^aYR({#I&6`S<D2z+`1vcC^tyh~-UlLyI`tka``O*dUfPVf{d-rrr0GfmuH9%*R~
zU~J92ob?G{WWQe=V-826(}I~#z+q}L&D%b<6<Pv;&(qWlPKDaSG^7~OL!(X)F<zla
zLN1b^vooC3p^uxXPSL7UHtY19V|IF~aBNi;!`2;Lr+b{8i_;!%HFBH7)?p9OQ|uiD
zwI5IFGj0~Vr^uif#l-~mqXF>W8u%H0s7qMZnrU0&)!mK4YB!1l+`zT1NY^$XPmfmI
z6t&@tsYc_;>|&^}&6n_q>csLgzb5_)jnmH5=<mL%(xh|BIflP;#qTN5^gCC4_t*c|
zcNzLhN1VgsW#_n4b`F&$N@eHV*coTRDT8{}nH-yPrg7#G=jiy1GwDn_=bRH*Pdn3N
r<*`ZU#nOEFZ2743xN`=a_=P@5m8MH`&S~HzH{~1!GL02Uk<5PpQRctf

diff --git a/bcovington/src/utils.pyc b/bcovington/src/utils.pyc
deleted file mode 100644
index 0e5636000d5d171cb67f4c1b9192a20c8d3f67ff..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8418
zcmcIpU2Ggz6}~gOUhi(~ICf&ECayYFXuC=Mlai)C!K8KVG<B6YWt`NBo9(cknYG8B
zomtO}V+X~&q#&URMWO<Pgo;3-Z}5Z!FBQ*-CxnD}Mxx>o1n=;D=gxZF5D-$Fwdc;c
zckbMu^Yfi^?iBvfU;Tdc%uQG2zXI;B<4XUC!o$CzLZw=US~iryx~W1_Et~QTbU`f_
zl!vmYyrNp}QC^Q)E-A01mV2eWth`?3S5(tbURiyh)IFtcE?1RT>9kh&S`R7j5G%`l
z%Ig!NUwQpP99F)m)(4b#Slz>7jwo+H+6R?)M2I2f4GM8oc|$@RQ{GV_hLv|rh!HM-
zu{MmY1^>dYW`N3iQOw3$QRXKL0ce#KQM`N6ej_M>2*{Fb1jTH8JBU`ZIGT;4X0W=I
zG_oL$_E^Wk-oTYwD17V!J4Ojs88SQgWC~GK_F|2rrq}RJ)xvKfxSjap*4vX-?Mr9Q
zymD&Nnh96qB*@lUsnv`V%k#0kRuBbg7PwXrxryKMqpT6)N!yP+KXUI_?M9Mfg_G87
z+-h%SvOM&-jJ7R#F&($gzF2$l)VOu-g|idq&Yhi<*`)|yIBzY*iD%7wR@}6*HQ(B1
zwUG&(#qCaW*l4YI4J(d(E5xw!_R9bJ^2%1T=_m3|iVa}h{&}o0%{V$38-^F7EV&~c
zX3krwFAuQYM&@HNiJMxrY^xpmxUcyQZ+>pP!+~sm?$jxH(d_|ud6~Fo$>|!3yN0@}
zlmo&P0@Z>Ls1}8Qh!hlrl3b`!3o;H6zL2qHB`c%sELxgoYMey$CH2FmO`+O+&t|(#
z_2I<2NWTnfYAtU0Q(ohC;7#5R!bak!Q>#HXwE_-IP=0<gwc>UZZ7NONsjVys)5-Q7
znci`r1DWGIg|SkuN0}9ae=e8Ei))ZAPl3p=S*6wu8V%I&cwRSIKfbS_APtK(Zg)Yp
zX6gNNv@}ch?MybsiEJ9q0d9}OISwXsoM$;J*P;x~VeZs%qDISioUAI9R_twstm^l?
z*mWG6{p>?1y6Y0^QF(83F~p6O+b@+%W~Eow(bZTN59CLdy@4y`F8uvCAV^6x0D=N9
zH5&8~4X88eYbZp0F=*BZqErkoh+zLMv1OV_12zZ43nEan@xtZH6F0185c=AZ-3rFw
z#r4kON>dd7srb-q=suKy22r4z!zS`7OhH>ikGhM0bSSC2_y^Ie?&2RrS>44yhziw;
z1Q1<Y6+oOeL@w-u(<F|wS|8fPq5@BBh8xSc+K4+0gRlD;?I!eF*e_Yab|Kt$oMs%7
zclK!l4^nv-MgtsiFeHVa_@S^WJIv0B?>XJ){hg*xryA!LHMR#y4Y4@Jf^csiXYmA!
zQ5Ii7(N(KQ)+nx)CYo|CRiF(I;7?mKO0>8go94<zn%6*p;b8STSZRphVBZ2`>ms#v
z5!t%PYh8r3E|OZWP)zGpDG#aDin^DhKtUbX3aAbLJu$pn<AMTpEi=kuJq{ufL95ZW
zX@U>(jOOv*8TS|vLkxWev*u8-P@{3$)H0hKv#A53J`3<wj}IFLFwJRP8XF3w!oZ+N
zj!dG4iR69&1^|8UDYXl+X~OIfut42X><~R2qC_!6a*#R%cK>f`w^!|aCW7y!#m?3L
zq;?@;gIcf(TQ8$&K<z@@6$O=e-&C7x>}bHkAJ(Nux_qd+6du~8Bwc=?yA&VVrB}NA
zUU%tvXqU2d`KRtudT5u5bUAKtxtrJ3(h{KLt2eMWjg>%L?(-12$gShTrVxF80N&ym
zHu$+`*p1YIXE}kD=@!f{M6(R6qIzrPPh9G00T<gAai#RMfw<B7u)1YZe7it8nffCz
zw}b)EC5HySDiAk$MhqyBa=Y8(QE?-sR!w_>o4E+`F5E;-)W4z*V(IotjAwt5n;;{?
z$F}a}?z#nHY}B=_#t3J{wIby69G4McM!Vi>dk6t-L78KyKL|C1XK@IAwAJzvoBLw=
z8~&ZNMxM!-G8|k&wkA~uxT(bc8G{CKJb9glORuA!60G+s98LUB_Zd8jt}qypN#1-%
zVnA6#6np-`aZ>!>?bvOsyot$D3ab+Yit{jsjHBjBqw_amju~Ud3GGa{E25%{EBzTn
z3|N6evy?n#SCB}17}%AEC<Q4U@;USY+?3U>L<g(r-=lV3G5}0-^+{t_A_Rn)Nbz8l
z$@^+^Rz)xxm>Il`cqk-%cm(u92+@~*n>VkjFw%yplHXAFh&IpzDd0v?B|m5NU4j-G
zUdHM;T#@}TGuK<@9CtourpTcTHM&t!>7SV}cI(Ms)wW&<EMG7cx|Bc0T%-=EQQi*g
z+kn0u(YHZ;8&ac4B{vExnKAPB(S-LSu;`eKGK{+H-9TI=D=w%dEU<|1bfc=0wt*k&
zH*hUIxYzrJ?3Ft-)g^C4U3v?nT=I^C@SXs^F#br-qLA>=hGC=@!@ubAWeNVQ)!??z
zh>($>6|b!OZe}&tq&Kh&FVlifGYC7K(mx70A^Lt=(g^$N)oT;)TwP45AyS!TjjI0U
z#hGi16KoUBOE`+4!^xBwQGx3MLXwo$Nhv1+Mxc^A2ZYjqL7U{$^4O95JD|0sbf2B9
zDz#|!lPRq+i#L44z-(Bk(Ex4s6uV8cMv|r5NRMSn$QgpRrrKeUNt6--MY!oko9VGM
zA%2m!=aDY&T*N*S2>=DIn`;@PzbKBJb`q~UdkYc+<{AVc^qL&j?IT<br_hbkk+HeV
zg*J(g4$kat4u#}Zr7xjSJtc#GPa|OLHwRS77&b<XA*09WX9#H)3=3_~8?PCM&3>a?
z7&6LecPSJ~f_t6CmHrY11lSEQeoQ3T`5qnIT>U!=fUvX8Tmc^Vsk!=w008_f>=fP=
z0SrP<c-JB=mjt3ul&~?BFm;rOO;A=kJ*%Cb$P?tuXjNM9x&$Z~S5Uy%Jgt`K(7O$z
zj0Z}H2T&~5M0K9T>P5+ya8!a50onvoA~6GPD$k&Y=%&_Hn>o0U)dlT$o+AUul5GxQ
z)7w8Zx_uxcIKh5nD0F^GzBVIqKib8t`jJfrYbU_*p^wR(gHs1lM_IG>ksn|RF>O^G
zf({HCH*lpV(fZz*iC62--_^HUlb@@yGG2e)rS<aKOB=~q=mpph)qrpk9IF{>5fu&S
zouh&R`Yl|)BI%wi3C?%LPi%kH-x0PVlXJC|hViza=p#2G0)6knvBN%!A-{_&&jn;h
zG)`U=MEYA4z`{Lm2U-Vl(@<@y`6t}!T>azR$F0LTN{XCZP@5-n_XB?16Ba;s!4<e7
zAa`FH@CL=rz1g=UEH%Y#>A|~`VJPS?yXPM36MEKeJ^2;wMXziWb~lX|wE4Y^MWV8V
zXN!V|I=vR{wqNEJ1&-}kK-#aeIFCXkV^6dC6&4rRMyA;0ir`lm#~WL1kzNP(m`)Sl
z0uSd+IFrhp6(8BUZ@<Mi=TRJ-=rZQndJS5Y9jH#ya1ib_euPJ<g+iG`yQ)z#bF?!@
zSOfN1#-MrF6l5!-A5Bf<{TT?x2?!uWzWcTL{?U$*B|3o^mpo<~dwnQ*yj4^?A5k`Q
z^$%4Z<KO`lK#+?HAmPmal6*8nWq`~Oog|;ii6d;vlxT%@U?Y@BH$l#fDvZrfMco8+
zDfPTli7IDo&O-(87HY6rEPGMJxnCIwdi#_?uu>%LA`o>`TQIMJPV{4RS!q`{?6=V_
zmg|Ob>TAb4jyC%mi|c$!?4|iqOGr2KFrB>D!#xf)8N=w_R;%sgx}voNPCeGs{~FJ`
zx`HTps4xU=8OEVX6&|tAID#m+3T<&|C)AZaZ1^cEtAY)H02;m=n9>o7DbLlvL4<&K
z_)LyXpc;3sOKOgk9nyqkAQX7RQN%Z-iqcjvp<B=#ttVhBgWYdZ-t>_WJj!0x6r3>-
z|1E8f?S+g#R16u%5HgPCMql6#soZ%Om~ABpkQ#Cd&$Gez0&m>S8mr?x7_WDZ>zREZ
zj6&3g1N1Dm&dA%^32H6TLMAfUGbl1fh}Y+w`8j9y>gDTK7UVDu4YSu~E<Mon*2S5<
zURAs{cm2whn+}6T`*jY*ws#xhmapS2a($ndMBtV&sMCvcd<3L$lc`-toBb^N(jkTT
z#(-OZj~rRh41h*t1okN*7x!92T_e(?rq?pls-HP}Nq@k&`hc!jG6&#S2aJL-ZuA*X
zf-dWvic6$JqhQjDD0W`bo@A2F2BId?16e3~zy<2i7idYh4y{?-?1hk_U7&g8N5U?K
z`<$@(S!RB&Bid$=;KK<&RqdCrh3`e|7y8Yth4a$<nT(UO*ox61*$z@)T6M5avw))l
zC%Gki1w}^{MDm<R9*JuRTU%ik;6qH#Dv9e4GR>t&rz9eO+u>)ee5SvkS%3NH={GEh
zLd=<Hws%VU0d}$n<*>~AW4R={$!N#GAH;naH%{4JGZ-cmBrKR>Tsl{O&d{kZ96trO
zsy43@q@jYacFX}ghk3)KLBk&~g<X(jQCa{2JHHbg2dE2<&(%N4Q2}xCJ)jOA$blXY
z^YGDBQeCE;69y0hQM692`&cj2hDMQ4>*vtfya{u|jdOhyM&8<|v8mMQi|knD*KbU3
zP7rp8M~68-zOdo}w@^~YaWgi5qL!k;yaRUHY0ux#E{X9;5278!Cs#X%cH)?5whI!G
zRjW3=lBkzNDBYmEn!757FcP2F7<t$?SkP11v?+;O>~&K7Ok!ivc|bEws=<QTC0c7U
zP0~^P+oTxO>$84wWVcWi5b8ubY4^k{6+fwsUtp%0+!iFVn;Zn!0pSe4D<H~=Qho$+
z;KTN*_}}qNz8)!fu@mMnVxNAa5BXdlGQ2+IdBf&Y#<VeFK5Y)-NsR%Ngl}#fhDiPK
zRXVlNrr{G&oA8qMY;Uj-Zhegu86`W`X9n__OVVun93OMP_5%CR&GWz!=L_~bY^8PD
zODx`JA(9cHq^NfJB-A5=av5Vz>kobxVB4BGN5G1{s!oGZ7Dt9h#ww$gXDa>pic+nN
LRZdifDt-S3NY$g~

diff --git a/bcovington/utils.py b/bcovington/utils.py
deleted file mode 100644
index 40d3c27..0000000
--- a/bcovington/utils.py
+++ /dev/null
@@ -1,269 +0,0 @@
-from collections import Counter
-import re
-
-"""
-This is a module slightly extended from original utils in BIST-Parser:
-https://github.com/elikip/bist-parser/blob/master/barchybrid/src/utils.py
-
-that has been adapted to include to support non-projective transition-based dependency parsing
-and CoNLLU dependencies.
-"""
-
-class CovingtonConfiguration(object):
-    """
-    Nivre, J. (2008). Algorithms for deterministic incremental dependency parsing. Computational Linguistics, 34(4), 513-553.
-    
-    l1: Word Id of the word at the top of the lambda one list
-    b: Word Id of the word at the top of the buffer
-    sentence: List of ConllEntry
-    A: set of created arcs (tuples (headID,dependentID))
-    """
-    
-    def __init__(self,l1,b,sentence, A):
-        
-        self.l1 = l1
-        self.b = b
-        self.sentence = sentence
-        self.A = A
-    
-    def __str__(self):
-        return str(self.l1)+" "+str(self.b)+" "+str(self.A)
-
-
-class ConllEntry(object):
-    """
-    Contains the information of a line in a CoNLL-X file.
-    """
-    
-    def __init__(self, id, form, lemma, cpos, pos, feats, 
-                 parent_id=None, relation=None):
-        
-        self.id = id
-        self.form = form
-        self.lemma = normalize(lemma)
-        self.norm = normalize(form)
-        self.cpos = cpos
-        self.pos = pos
-        self.feats = feats
-        self.parent_id = parent_id
-        self.relation = relation
-
-        #By default everything is assigned to a dummy root
-        self.pred_parent_id = 0
-        self.pred_relation = 'root'
-    
-    #For debugging
-    def __str__(self):
-        return "["+'\,'.join(map(str,[self.id,self.form,self.lemma,self.norm,self.cpos,self.pos,self.feats,self.parent_id,self.relation]))+"]"
-
-
-
-def vocab(conll_path):
-    
-    wordsCount = Counter()
-    lemmasCount = Counter()
-    cposCount = Counter()
-    posCount = Counter()
-    featsCount = Counter()
-    relCount = Counter()
-
-    with open(conll_path, 'r') as conllFP:
-        for sentence in read_conll(conllFP):
-
-            wordsCount.update([node.norm for node in sentence])
-            lemmasCount.update([node.lemma for node in sentence])
-            cposCount.update([node.cpos for node in sentence])
-            posCount.update([node.pos for node in sentence])
-            featsCount.update([node.feats for node in sentence])
-            relCount.update([node.relation for node in sentence])
-
-    return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, 
-            lemmasCount, {l: i for i, l in enumerate(lemmasCount.keys())},
-            cposCount.keys(), posCount.keys(), featsCount.keys(), 
-            relCount.keys())
-
-
-def read_conll(fh):
-    """
-    Reads a ConLL file given a file object fh
-    """
-    
-    non_proj_sentences = 0
-    read = 0
-    tokens_read = 0
-    root = ConllEntry(0, '*root*', '*root-lemma*', 'ROOT-POS', 'ROOT-CPOS','FEATS-ROOT', 0, 'rroot')
-    tokens = [root]
-    for line in fh:
-        
-        if line.startswith('#'): continue  
-        tok = line.strip().split('\t')
-        if not tok or tok == ['']: #If it is empty line
-            if len(tokens)>1:
-                yield tokens
-                read += 1
-            tokens = [root]
-            id = 0
-        else:
-            try:
-                if "." in tok[0] or "-" in tok[0]: continue
-                tokens.append(ConllEntry(int(tok[0]), tok[1], tok[2] ,tok[3], 
-                                         tok[4], tok[5], int(tok[6]) if tok[6] != '_' else -1 , tok[7]))
-                tokens_read+=1
-
-            except IndexError:
-                pass
-
-    #Last sentence
-    if len(tokens) > 1:
-        yield tokens
-    print read, 'sentences read.'
-    print tokens_read ,'tokens read'
-
-
-def write_conll(fn, conll_gen):
-    """
-    Writes a CoNLL file
-    """
-    with open(fn, 'w') as fh:
-        for sentence in conll_gen:
-            for entry in sentence[1:]:
-                fh.write('\t'.join([str(entry.id), entry.form, entry.lemma, entry.cpos, entry.pos, entry.feats, str(entry.pred_parent_id), entry.pred_relation, '_', '_']))
-                fh.write('\n')
-            fh.write('\n')
-
-
-
-numberRegex = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+");
-def normalize(word):
-    return 'NUM' if numberRegex.match(word) else word.lower()
-
-
-
-
-"""
-Looks for multiword expressions in the CoNLL file and creates a lookup table that
-allows to reconstruct then the output
-"""
-def lookup_conll_extra_data(fh):
-    
-    lookup = {}
-    sentence_id = 0
-    lookup[sentence_id] = {}
-    id_insert_before = 1
-    
-    for line in fh:
-        
-        if line.startswith('#'): continue
-        tok = line.strip().split('\t')
-
-        if not tok or tok == ['']: #If it is empty line
-            sentence_id+=1
-            id_insert_before = 1
-            lookup[sentence_id] = {}
-        else:
-            if "." in tok[0] or "-" in tok[0]:
-                lookup[sentence_id][id_insert_before] = line
-            else:
-                id_insert_before+=1
- 
-    return lookup
-            
-"""
-dumps the content of the lookup table extracted by lookup_conll_extra_data
-into a output conll_path
-"""
-def dump_lookup_extra_into_conll(conll_path,lookup):
-    
-    sentence_id = 0
-    word_id = 1
-
-    with codecs.open(conll_path) as f_conll:
-        lines = f_conll.readlines()
-
-    #DUMPING the content of the file
-    f_conll = codecs.open(conll_path,"w")
-    
-    for line in lines:
-        
-        tok = line.strip().split('\t')
-        if tok == ['']: #If it is empty line
-            sentence_id+=1
-            word_id = 1
-        else:
-            if sentence_id in lookup: 
-                if word_id in lookup[sentence_id]:
-                    f_conll.write(lookup[sentence_id][word_id])
-            word_id+=1
-        f_conll.write(line)
-        
-    f_conll.close()
-
-
-def get_rooted(conll_str):
-    """
-    Returns a list of [id,ctag,head] of the nodes rooted to 0
-    """
-    rooted_elements = []
-    
-    lines = conll_str.split('\n')
-    for l in lines:
-        ls = l.split('\t')
-        try:
-            identifier,tag,head = int(ls[UD_ID_COLUMN]),ls[UD_CTAG_COLUMN],int(ls[UD_HEAD_COLUMN])
-            if head == DUMMY_ROOT:
-                rooted_elements.append((identifier,tag,head))      
-        except ValueError:
-            pass   
-    return rooted_elements
-    
-
-def get_new_single_root(lmultiple_rooted):
-    """
-    Returns the ID of the first VERB rooted to 0 or the leftmost rooted
-    element otherwise
-    """
-    for e in lmultiple_rooted:
-        if e[2] == DUMMY_ROOT and e[1] == UD_CTAG_VERB:
-                return e[0]     
-    return lmultiple_rooted[0][0]
-            
-"""
-"""
-def transform_to_single_root(conll_path):
-    
-    with codecs.open(conll_path) as f_conll:
-        sentences = f_conll.read().split('\n\n')
-    
-    with codecs.open(conll_path,"w") as f_conll:
-        
-        i=0
-        for s in sentences:
-            if s == "": continue
-            rooted = get_rooted(s)
-            if len(rooted) > 1:
-                frv = get_new_single_root(rooted)
-                for l in s.split('\n'):
-                    ls = l.strip().split('\t')   
-                    
-                    if ls != [''] and not l.startswith("#"): #If it is empty line
-                        if ls[UD_HEAD_COLUMN] != "_" and int(ls[UD_HEAD_COLUMN]) == DUMMY_ROOT and int(ls[UD_ID_COLUMN]) != frv:
-                            ls[UD_HEAD_COLUMN] = str(frv)
-                        
-                    f_conll.write('\t'.join(ls)+"\n")
-            else:
-                f_conll.write(s+"\n") 
-            f_conll.write('\n')
-            i+=1
-        
-
-
-
-
-
-
-
-
-
-
-
-

From 70baf9eea6bb26e02ace485e61582901a6171fa8 Mon Sep 17 00:00:00 2001
From: "david.vilares" <david.vilares@udc.es>
Date: Wed, 24 May 2017 16:19:09 +0200
Subject: [PATCH 4/4] bist-covington: a non-projective transition-based
 BIST-parser

---
 bcovington/src/covington.pyc | Bin 24143 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 bcovington/src/covington.pyc

diff --git a/bcovington/src/covington.pyc b/bcovington/src/covington.pyc
deleted file mode 100644
index 7c52ffdf5bb5b35491481800b0996b6ade7e7316..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 24143
zcmcJ1eQ;dYb>Drv_$I&)d=LOXmtP`CNu)?hqQy|6D1w9_ks$Q|k`gRvwb*?C7F_HC
z?>&&94cM_MJC2<wZIq<3<8(T$nkJprGx1EOb<);J^WnPoOzd=~?X>@NrXOi%(n+Vz
zq%)q*xWC`;zW4Sm04hpF6WqJ^<J^1CIrqGC&pG$L&HkXP<KE4~Zxl`RFN^ON@d<td
zNeTZQQ!!?Oxs0heW+7uHGiD)cCbMRt$xJpGhizF?X*LTjX0pYWn@pwEEVP-)Hd}5s
zm3FhxZYJApxy4jE%w&gVKz@sv+!E(I&16@c-)bhe#`$e#a+@i&no_$dwV8K}d0@;N
zliN+H#f%Si0Ne79@NdXLE@X0ih_r-k@x4MRU#wLtm2eA+_xy6`Mdh6+U#i_LS7*ao
z^-`@mQ=VP+3t_odwIgZZ&|8@G!qD?=T{HR==L+Qrqy@#W;LjJTW8t>+a4(Hs9iO<C
zu(k<pFX0os2=qK-CLNLnZi2_)Cin|(f|HZYrUcHmm{PNu1ij#D8wm&RKscEOK7w6f
zV5jn67xG{f*tpGjU=+9lm>sI`R!xs7fw8@+?^FFw!S@@l*LZ!#+iARhGmCi#j5lb!
z0rLP@?J}jk#@k~`UFIF2x7U=mlJk@MOlg}k`%P)PGC5P)q09kO>Q?5UDfK9G$dr1O
zIc!RO${aDJoyt6BO8v?lHKhS%hD>QtnPaB3OPS-Qv|E|SO=*uZPZ)Q6Xdk&wmWGAu
zT-Zv!mS-1g<&r9Wud*C+o!ELNq$C8hrLYa(3$+EW8ZIxWPoY$p^$WHE`I1)&3mO20
zg)+scoMFNN%J`UKCz7t{qhX(cNQIKdFBp?|%xb%t&lu$Av*u35_-BYFiw;>zm}XP`
z^%Pi(z$Oh4S`YS_6j+<UK4Zf64PcWgunvL!x`CvxAMH#EtW#irY{IS$V4Ws}ED77s
zItC&=4{^#6d8gcD!X4_+ZJrH#l<P&VPr04uS%@#>gA@0gXG!aT3Im#X&<wI_P=#G=
z$nR2qHwC_z<vl9zRog!0_bZ=M{($lal|Q8XVY6y^dc^#cF*A6gn$72pS#34*k5NGk
zM=U=YH@B#{ZC&$F+}z6M@R(W6lHR!Y@wmAyZf;69KOQ%?$IZ>@<|p{Lfh!!gCEf5O
z%cS~*C_1V9N#&nX{wd{8DSt}&)5_b0d|LTuRR65<XO;h$^3O4yKWY5;Gg!PT^otdU
zQzrOEhD}cy|C<@`-8S_Tk8@N=%$$!i&&Qb;;>?9O^P(mb<ehfozmW|un)#Q^{H3TK
z{6i&Y8jfn1zbv$_m<L(&W^*(bXMA4y7nEL5`l6|JnQ+9+zf9Fd>5GsXjWKv7&b%6D
zuEv>bno^KN?k^jW8|84QtRpwdDd)$OUQZ#X`ur<G=vAdxm0n9B*Of-@MvTF0amI}^
z<8fv}$O)3jz1P&hxf??6y{0;HH-y$k<W!%3O$fP4$CXZ`klUI@?q-a^tvGW#&b%IH
z-VkzvByvwRHz0RY$UW6uNA9MOn^gMo6z5c*za@ljD}7z*8!6<rrIC9x#$YPWycK8i
zaprBaDw~0reQPt!z7=EEA%r$c+nYjSO6gll^Ge?~Rjapz2FL$D8cLA!&zn10)`SJ_
zcMu*Z#t42NyU*tgAE5c<Y1q@k;`6D8Yd#UikE!jIbXy_|6KZ=c-IgrHqS|i7ZEKxc
zscE4<t$a~=rSnDOA8HLt)ZD9`!f_9`XlhyRL{Q`(&^D_LEbm1??6c?LV?4Au8fL~+
ze@(rojsNXdLc<c+V(v5<|8qpq);t8u*$-?^5MBCqYdH4-#LhmvEg1tV%FVmX8S`c3
z=ap9)sV-P5-R3+|whtuClc;Q;0ciWEd_|Z8zQ@8}R~hh8`Qs`BJ}NtA{*DPN+|eLm
zoMFLA)jl($?U!g()sTQ{n<8k<g62}7i;bY#dI)+j1+x^x_{w1W?M$@>>su{e`EY$b
zhqVH7&Pv!p(_hDVc|CYE%-zO%@x5`FX9V$_rR}W!I%E2K>to+rKSnh6N@KlU!E%bW
z`|AOsj-P0(|D=lZvAq68J4u~4Rc+i5l{zM?>pMmRt~J*Ge^L3V^<zY1f4Z@LBb7h7
z9w6%YnR-1;00<q)IzhR^gW2B;r6GNm(EVo}52gFFfAvt&qz}xhY>@mr)aT<vy)dK4
zf<s8hy^2>1y;82W7?u~xpYZ%#ImlIOVXknuP_7iFE1rz;A}w-$XxfkC6YN0(>l+>)
z{;%Qzh9wS-rxeqU%VQfGq7m=55q2w*;=OX<<rkJKVR^A~zwR=w4585|34Q{(lXLJD
zPL>LH%cT={frjq|Cuhsy$?0+sKDk)%1J6G>U5vfQlS%Hx;(fUq=PQMU=~Cf^w=h<~
zq2)}72)YhU`1IftJjN#BKDEcZnK5gQSpyN)i!i1i8sWW+S)!v0NMjMEt2sJ0I+4G9
z-5t)4jtyUa-Fm#F^~6*1V(!)alAND%wPbs7&cLh8JQ5-UddKHW<ziT@EiAr(K!FK3
zd%JlwG`f@!5#{zFCx@=$Ei4q=el}1L+yUnBF&}ls))BM7A^)1hSgp+k%WSe>lsqli
zjqBs7^%7|!NJ~~+tOY+_%COBnTFTsA7_q)gmKF;5kDZj-qb4QXjuEv;S1w<ic*rtQ
z?q+_xYqa<f!ld)|qcKt!eJOPT{kKl+0JnZ!Tn4<X7GdinerT3Bq<v@<EeTGdVhn10
zIY5LhSE%04c`JycRST8eOu6FaW@>)!%60cz&RdxFN+kdV!RZgt=MtWwiHjrYZl^v(
zw~f7?`VhS&3hAl-COS>bdAU*<p`-w@!XQ_h$%Uw?lOfWsU%!>34i4h&%v)Lp>0wR4
zN%y!n_Lq?Pgh%X~9&?DQ`$VmFH}9cy?!faoRj_bu7D(aWf4UHGb3A_I;_&gHojHp0
z=*6qYhdzgrbm+Aj16>4LP>x{9&MPf+arojj#Y!(C4mRN<*6H~Qn1xZEkN5b<u%4Ik
z^1@=pLn#JqL*upU*DjAu+`JZcvk=AHQZS6zVU`j1;;{B)OvFfBdI&TRU%onVQBhU2
zD>@egaN~qzjdE%R^t{q#->>;0qp^OWTB<FGkihdwcH`&HH*s<7W$nzB+AQ{PY+OWR
ze01d6_0eJFeb^!8nfvPNUcP!$Tw=E~Vn+e3cNHEE6j`p9HF3=^6e}JgbjDPIa3PO%
z3GA+=9JlOyiHOu&?<K8^o3t(_t^OvhzFHa9){K?Hea{bsOqAswEg8I2Tdsz}4dHKw
zz$4u5){2E`+k<;+AP}#*yx<`a?zy|s-<6p~ftm3NVIb}x#A<0PE5|jTL!A9er7&xQ
z?v-n`AXHInQc;#*_h60G?&YULYKlA4nlGxWoRYb62j9ZmLRL=u;^Ljk>Ac_AV8xgX
z6_HS_-t~$W!LeG^lk~-pQ$Asz2;ftrHLunH;aAmPxss~AO09Nhc~Mw`63WkFA)JfA
zAU;7z-sB4bBG1+Q##sUhwk$Jiq~VzqLaD0g;gnF`EBIAvKuyI$NHd29!6t)?YqpJa
zFf6T7ObjI=?&?=db%2;G2C*$7v`2{Aj)s~TGj13UL4k{@CSt$|S%?wf`bGUXf7~2T
zPrFURv>9#e8#QuhqH2rr;3PNc&Ee}H@dVaBhE<%mTRADD(y%ng+q+a_NE)S8qs=KL
zDWox0xFUju7!BeX^^0j#F_IiM-zrp=J*%3i7N+@>CQrh2dAh6umE^5LwOpwbcxLp{
za=B81#w88GvOiNOdQnv%jXS-*lH5&qq#0w?vO6{wk4C1=l}jbBnqRJ#A--gqtvDUk
z(9ABBN_k5_9zITC!GpF4^jKEBs!Vb{9``Jaj6YQ@MpRR!`ca-XFULy_L@rPtohB!Q
zLQ<M8N7{!?_4cHsb}5>2=_(bS<^=^W#T}79ZCfI+(`phsMkg=lC)|r;<5=_SW8-?{
zYRh4LAQX~;e2i2dh$S^~MOla@AuTG_`z0e10Y4s+{T6-C?w+L~GG3mG)S9%jdxdq0
zvUOi(aTJL-fA4hJt^mHtU+&g0JEVZ9l=6`j%CxFWulu(-05umLqhJW%DX;0H+J0hS
zK3|3?<?}>0_`jSW+v#*Wed|A&&P-RP%jv*hPo^i+ncePmMt|*@{Z9Nhm~GF*_4>{{
z-Q4cPe}~cIOr|Rvm)o5WQIk3BT*!2FCw;O9Fy5KW0cTrt!P(!2Su*X;9_I*fcpNR;
zoL4f#prO;*&cBRJA4l8a?7qx^bJ7{e4C8YEpXXBfSDoXTB00^q<xWWpDcAvD0p5EF
zhj$1mcR37Fpoj<`y!<A!+QhTM2Z(7P;_?nan{?*1)~pr;_t1jnTWvP;I8xF%`yiqk
zYb|CL>)H~ZCrx#lwN{>?y@e>jv{`G552SP$27$o<=*Hu$wRVMTZ~)d$!LSWqK=7vn
zUTaGf&RXj*EA~ukiw@DIa&j}a7{;7C%sf*32a;RNAP8}m{-wDM-vhzZQj0l{voq#e
zkpl>jY-1VyW}3}7&ZX^XeZV+QM=tv3!g2`UAJk3XWUrQ^kOB;s{c3P7so1#fp37Z?
zWe!8FRxSAVX;0tr?w?D>tM|k+aV|F;+widwz7a6e_UCdJF4Q~IVkn)<jaEzEifnza
zP@LmTbr7oq&*g5!S~O>CatVkoD)>pTgm#iVJQta=4G6u=Xb@H#Vl$7Z7FG)KgN&qT
z|L{~dLq)Jrz<<<H03H);0+lg?aquXMVKrsW9&Mv&{X?mtAoR<NHn&(Qhsw=VYK5@w
z+DnyG;0(Atez*FBgM3s~vM%;=6ry3>NC#XnB;Wo^IK>BzoB6{+XCD}eSr@MmN32>!
z7?QrkkJq<9S*Pw{uG-iTHOLtM*}dX6iei3k5%{&C#5*lq2`y8xQVTp6k1>^kF?VTm
zSSMfwzR;}aj-b}fG5IMbjH|iBNHml*xL>=M+Li{<X7`xQ%rkC#u*|&`?CaL_vLy~w
zE>v(G02f>~m{R$fMfY{IxNjg4)AQ~m%O7X*CX;{6<R2liq6J3TZSteam-{wG>LcF|
zB4fHbGuxbwOpDWz?a1te;-|7dk{NUcxItliD)LF8g-QeL5<Y=xl^7z_ysAeK&)V?R
zd9wyxhX)gnU)Bsjt@BLlMOCF62Iq0i4i$)=&1Tj<rQ50hA=uG~_F}-)NPR@c;4vzL
zzw@o;4p#hE8wQTsHya##047QXZ<F?)x)2YJ4FTv{#EJ3$P7Q3^HXHPt@xEhu&~67E
zYZ$a2SZQF~H^`bn>?(I~(tgX~T$UpmUtpWf(d%F%gP9F?1*RdkLu?`3m-=y|qyJ~5
z_P}{;aO9<KsSy9B42!y`0tLzZ4yE0^x)Rm&r1G}4H?C<jz3kH$mHRP6uM*bojBBxT
zRxfGTjT3#~Q0qrwIYIrKLcKpmB|&{4RWq2%Thw>OH4*B&qjDqad*WK`=Mm}~C;Gsl
zh-!Ki?Bnx$#Zg!R%4F88Re+05Pc~EHaBByh_FLijY1s{3W^HTqm_(1rsciJrB(iO5
zARH2ul<c4ZoiSvN<_5%T71!NxTd04KHi+s6`)O~aTRM`K8EX`s%C2qG1Myd**KGXn
zZ9vb~Y=RydEP4lo9t3tBdIyX}FWq9%V@rhI_p)o-MfxT)e60ya6$w4085|Q6ekdB5
zHFiYSU_@vbT<XDMtU+49Pu(!&bJE?+;wE!+H%D_E)*Q{r9EYPhSYzj4O~V}BW_6oc
z1^Sq*n>Ps8dQA0?o$!bWVTowTFgTX%b5ID8Y{*u56DDx4S?e=(&?Xv?4*}h2*80tA
zuS`$7>}Sz^b!S`wcAi~BDNS&;#TG5u)S<R!6BeKWqaI~{JpY92i^Z<j%z&LSQ|l8z
zQc4>pur-@ZfCfx36i<L6$UzMRXXl0qHtGL<%LmB8<_KcRe;@=m8SV%1aGNc8dN{kH
zt39yWTe%aqVH-^h*cav4=ZdtsH@`-kE5l7KREysFKJtan<tF@PFIS$y2?36O%T)!&
z)x-mX#koS@J(0Uos03cFh5)<IliOq@*ON1#K0&dD@VPAULZy;J<S8iQ*f@u=5Tcj2
zoEwS<K9&rD^9RQ5!`fmF;lcnQFq|;;F*{OjwtUyC=5Tkoela*#OnS>YwYD@bAJ^}S
zjtw0HuiURQc@2qzhzM%Zatj!(218`IF}7jDsNTY2c;CHAVA?&?E<FMFI?Ifc;$ka<
zJiw%gsB9IV$QTbom&T&|c_v>)qIz#-(Ra_X>Q|V2g~=~7`7)DVW%5f*eu2q*On9i_
z{ydYvg+$27j<mMpE36~-S1(_g$X|3XDgNk=j=ZdrD+85yjK4g3WkOcy*mc3tlv*gP
zD17k=I2a6d<;y`HgO?F#@@B$(!7nN_SFiTVvvaB+q7`OCCFDv`cxf4BCu$-rULL%q
zbSitglFIu+6!kAH&&(iVL>$Z2#pO^i4A3x7t2xTjwFruccoA}0InF#U0ubs}$SOt~
z(T|y$!n_H_wu%9tV6F_}1i_`N)<~!=lPL%XX^wy_RnAwAxCG*?(DIeF1~FPbvahB1
zc|PxPaUn%7fVkis2X1$rDd#345r<(Nmot~5bP#rO*7<6NpPRT#+SkHh$PWA!@b{u~
zGgG8|#-B?>k^TjB8~!Wc0gcMrn+>-ya)6Pj28Fwxy`aIhmXPsmu!_jzBcz(kI*ys{
zkzOKeoR5e%T;oz5Z}&fA$VZItRm||o6uy*0`d*YK@%bgxSU!Whj4<H>7kI7>e~=e@
zVFO?RP^CN}UQiJmEzE`p0+tgX@YaDH<82kO57!}Jx&s0sCJXUd74cF+9RlVJAYg`}
zis361I%t#8?D$O;VF}d1TWH}EsEfAn11$SCgUB|<k77o^8fal&m^1K=WH87|-w}17
zsS96$V{~(5UbXLW;3iaIcvxGnCu#-cFhgYU8d`0H#6cB$=J7rRr7{bCj-^5(^k1#3
zO0{NPQ;#|7d$+=5k;xL1WhOt(<OL>QWI{#h{w$N<WAaTTdX^#A2&TL5vho%aZ4mzq
zxgoBr)pJeAc{s&No$#>4F?F880@X`&{=p){>~O_-etsLF^;@W77&5B5Q43hZK2Z&d
z?mtEG5zFf+{Qpl;UKwvU&O!T!u<yalh$EkxM-co-dcOxsSN<XcgKBu{&oXeO6YttO
z)D&7pO1ab!u#D+k9Hq{HI}MS6Xwc|yGW~t9>>zZ|q4O;iQ#3jh3@kWov<G};f<^c&
zlBx8#d;}ntkhJ_30d-24Aho}zBCep@&RgTo_T2~xq#hs`=jaUk7c=0E8@$bdr|j=I
zsDxnJ$~!H_ZxcpVBw@Cybx`y6nWf(lxP8iY7=oYO01oRp1&+z^0*{^SXQRpR&IKGN
zn9q1zIPp%6gjVG71_n~RdZCHakn2Xpi$W1U`WUmI%?a|XEsWQJ?2JFujMnklrCI(|
zv+UL^P(qkxYlQD!vy_#Si?=+Yk@u!X-m5Nnp~#LrkzwEcQ6)KXQJr(DIiQjEM<X9J
zOH)x}Jo3TR$OqNsP(1S2G%~DR@Zpef{uOmTq8hj=m>15Cczo0>{gbFM9{Ff$<fA4W
z;_DoiIr9)4^o0Z<1*4FHD5O9D>Cw6ZLot8cEVUw94kR9jXf`o}E5KmP{F9Vhg!ZE<
zxLi?Ht69NK>=X1HLcAU#2L($!JT)fwx{X17#7OW)2SV-h$LYUt7`y|68hGrs2D%b3
zr}X5snfz%1*kZfMpD|14v$t1{aroh>0}SebM&b`Zq-PASNmFZrgs08YFJ?u;Gsgd}
zNO(3)f^`&8RVQJ~hamyarzPQOk$_>g#3a}nFd8rk33%NF00{{nGfO`L2~TV!0e~Rk
zV~r%d-gJBEyG_+mvvS4uctMdYtjBX^{+!r^PRDI*=qI8oM&z)B;dzZH#zq|ZV$*Gi
zL*yfI6G)td5ibnnWF#jGa5Lc1)RMyv>uf;h<G2F*!$J;zPyttate=n!6uUcx9CbcC
z7aVQS*GY|=at}&LKkix#2k_tbF1Y^yU(){C@y3RpXZas8xr9V!F5h<KHdL+o3+{=y
zHn?wN%0XCytt^Xmrm`H&Y4=mVay4`>6P&g<k2Qg4YFpIlraT3<^L7H;UuOr-hRa|J
zcC%GP_`jpdU11}keQ&XXV{jKwhjBm2q`o815s3REhve32_vk%@(neh)07r@2wR6AB
zc5UC-c&O*2Jyj!c&vyTm^+XL`VA1^)t3JajZmv<VI}h&`w`BemtBHcoWrQ!Q;dt~d
zXT$wBY!{IjNttP8vZ&_JpT@WQS;Elkn4V#qa)e4*60#gP%@B7{*$eJ(GxuF28rHAX
zLial?5v=6b7IndZ#Dl-MRbpRuoGujasK8lf*h%x@3KeqfIUPfjEz>dh5DM06_^2Uy
z-8O-o(O$WnT>kqKt%nLHY3XwIIR~6B=V|Bp>^2-H@8u`+q;m}M`F30Lu-w)cairUW
zgXV+i(~TqD9*nbple#0>F6S}p2Qbo43#o`t(1dlNF%lou{@kBuaLTswUo&vcle0Ex
z!{Mp#XBfuH%07YWut&DmGGTn%p5&IWce4PiS7CW7XluGcP2>7KEEb%it~|@a@YD%f
zs<T=^ZUs@CeVepA^m~U)*mtSxVX`c(*v+KV_}}2}PkROkbtrspk6&RqVFFAKCETN3
z4I8&6#;n2rPz@q@2)YXoobkdE$MPV)`ZJIq;0FU+2gnZ`XAMz#L^z{RUXRS~KXHIJ
zMkI~QLG8n`CK%2h;|gm*^e7Yzs&*Rxi|WQb0(u=N;RIUH94M(dU&S&2zTfy?PvEgL
zL=!f`VaJ1=1JatKgKP&m;5vY1sQxI8_@EH~QYOMKLL4(UBF+kn_+Se0OX}8uW=!U$
zh~G)zE#eI%i$IIGmdn0Sl13ccQ=Q|mw&3XeW(HJJw&11VhzNpJg@IIu+AXN>XHw)t
zp0IQqBx32`ncLwW^MDp}ley(A9Wl4Lk-heMeK(_mPBxQkg*yn7RcqBJ7ya5i)D%=>
zuH@lZlHNxS@jzYm#buZ13_c2>6%Ze+UcLI{>+8<*e+`3cgVJs!g@{9RMbgHG_M*}K
z8j}$wP!7iZb0+@+iC_aSbhn~j+Dmal>r=_Q%A&#j9m2oLgvwS|su*XQDdQ5dRo>w)
zG~|=;rG0I`9wBkR#m>}|E)}@@`$(+E1sZv?3Hx*&;W>F+tU{1TS47>b1iwabPL0jL
z{R7tgA(KC1H5Dzod-w9i*&%8m>j6p+@#hLKIxZYicVlC(RIc36u2LS4ECZ|hzrlY0
z9EtQlLQH|%Lc;0bsqb8EW`=yyuCRl(NlY|v!=`paLgZ8M)5zp&i;qy(lY$#oBK{{(
z3noxDE!{YI?SOaKg)2DQ@%==mFVpArWx8?ErYF;j2oo>u9K~mU1{&PfL5nxkIW10C
zmaT(MtJUPEFgB^Ygir9>*uG$0Lwi##TIHzV2c90*-~mF4$OcjbG?3z647y}#f^LBx
zrSidfz*Q>3NV!5&X6Ow*fVmE33AYXV1QcY({M^59gMtJ%A&z()F$7ipRyzMYcCjzx
z&Y~NB$pQ^<88A9dXYur{e7TLDra*?LzSPETWrtaV3n`~^Cs5;)?>a0akM~hs2bXL{
zP#(ekq7gpZZKiJoH*7|@cj-jg)^yk!sCO9fg4u$am<H(ifsvP!B1j?|6#t2sWEo?b
zgi}zfk-?g79@TQ|LNEtZk=7ow0^7J7+t|}Q-NAVcLOe{j3gu-72MIW-wKcXi(s)bD
z4*uUpUraxUUN{+Ip~)6@GGY`>($5`==QB9&0fYLryIBtx+ZHl#vv=@a<h4P=U6try
zSMAlViVk3U53wDd!mf&+AOL#3+EsUDkcY-!YRbsJ!NOmT-;oDTuz|j-Qyk2zskwpv
z$2Pq1nf&KziS>ek#z4fMwLNBKMh@~I=KQ#`2LG_ntl=yYhg$m;3Ih$`Ud})lt-;H+
zUa?i<f2kcBPPKlM!$&bE7Q#u6-wbt-1EU5oBvu3iaE<I3dnGnkvY3!!gLnw?U;rk!
z<DqEBL(TqV-+btJ`!!w|kB2}B@(+d13Hqy9j7)Ozb0G-VkmLl*1IB+h!{t;nmoum3
zwB9?G6PaW=51Cc`9|WO?WuG0EM1$XZ&H1HwGx&cSA`*guduY21PyG$EcEqd#06#eb
zK^i0r@D%+p8&!}K*g28zr+0{WL-0;hs?Dyzq^fAl0KGoQ<3Wzn2x<9j2aX_j#*lVC
z-p%@9cI`0}f`-TVqXyw&{;~=do`x0T1P!RazaI5&MV5Z6X&pIP6O1=EpiFZ1M&$T_
zx3sg?l5<SpjpVE!#gbz?H<AOzqiT}#TTN>S5)2V@Dy09=OwxunkcOxNCO=|2gYPu2
z9wp6#tB9W9YXCnegAnnCdWMa^-%Pa$H3oIOO@ZX^vp1A5I<Fn)JGdw!vx;}BvAA^t
z|6U6hitLB(u}hYJOkj1Q^GAi$ZnO5dGDF6HP2IS@P%Ws%o3L7O4wTfy2I(>5-%sG7
z=%NW5;q2UN2ua}L1+*TNMql!X;eq5Mi!j?>Cq;tg{7h>sYnBAy&`1I+ED3v4B=o9V
z1Hy^WHll_F0SV_5cuPV9@gmhCj=nggRs_h2NF`Nu67Ul=^;NJ>{V%mry`iUN4dD^8
z<nzDNR2x>!>XhGYt&bNGk3Q674I%@H>AwTB=|mf4Xgk&&&G;Hvdy*kyG)J=4AN3nJ
zi`}1=@MbO8G;y5=*dY3bLn_Xerh_tNHAAcM-)~!e!puXEkZL;%s;a><pawbtj{sn>
zPd1seYuN7_q1mSJ>{-|$wA=~RVwT=-<E+?YS#JAlY;8+pncE@cR6nDE-*(_b^K}z~
z&#GBFVOCF=D29l*A@X>`I|_}TWMqv6Ji7L3%-T~XR4h#!l{HG1zHaaz4na2@g8EcY
zl$h$P?VC>JeE5k-Hy8|DP74?O62b_U5PNsu$jg}1z9JsH2173KKPLDB8m<cGt`Ak;
zI(@8u>)>c0=L4^f-z$f6b%4@+oFtZuIs0x_G7jjr|KxngK*vd~oDf$&NeRsXBUU&3
zTG7L80(duZNG^xBqozIoobGR4#Z`lIK`S8jng{Z<<gXT{ahtT1i|S-yPvEYxyA1}g
z+?xo4|8-&ZD_`cnZ(OiHGj$rh#{!-LYvkO}8DwpMj&6)b!dvp5b&P(OjTq9{I33Xl
z=5W+q@vNsb!X|l0bY6Jd!re~qKpqrhJBb71?qq!yRdS~A4oDDe>P&SLPA6kM$YCqT
zxb^ocT;6$?ljMFIEv{Uif6W|S9_#$jnTnjB$W{6l+lZn2F($nngBLbaJ`_1pSfrzA
zZ@LoN3-Ci;oyY{m0^ZN(r2-Jx&W?vsiwy*4_=_b*==xD9P+QJ>i?!n15S;}ZMjK!|
z5nCYP>gn-~LVFQlx)2uU)QtCw&`dsiQ9JXMD9;JKuomJLEVC7xC8~I}YsTFZPlBkS
zAG(7vPSKK;h6~Z}k72k72hHd10;JqMd4RLEN{N>V0=>@bCLW->n_%C;Y<6~Fnztu?
zLcNIB*aCMiC&kCuYY&xE`NHyw+NYJHQ_8Vb;7OjnsZn<t1uLgjXNu%U-WEwGC(-bM
zLVW8x+xI5rVya0%_xxIQHea711*9f-AL-gp#JNJnIlLaF=Rj-0Dy~H!QH&%ZY2nI5
zuf728qhFukY*Ip}af=|J)R?9gc%kXPE_kw?8N?p(vI^=m9q>-~IIrTKaSYWCkGB(^
zYqyg_3=$qNUYY5OePH&?ZpXcg7WT;OL;SMCY0n&wzIUs%4Xvzgk<Z-W^fz5{jyun0
z1~Rw#jaOI(5dVB8)6yhgI^{Rx6&!lgS$No~TFyaFnxpcY=>VdqyYRO?`y$HS_-xN~
z0MkL>%~xaqLy9u3PCsyF88yTk6nEJZU+NQnF5#ndVK~clPi0HM9f|LRkdJz<?b`+e
zEHbDep?`Ft2$#yM2?(X{fKY7t5so!vh#>9}TK63rl3ZohVB<#Gl$b);y<kBIG~G<s
zGX{mDkKM9X<%v-zzt9JktLSH21}xxxlre2Yx??8|t!L<>Rmx%npK?if{(c_!;_`gk
zRzWBHlqKHm%@^<E-Q!@0gjkIxJwV>``KFe}qaJsBgdC-oZzD$;wlcUiG>FLkP8*A9
z;OyIICiO9M$uTfjY$eC=9t*;{VkVtoFc1fDmc21pY=Yt{KrJ7$Qn8wcv2q;i?Q(-m
zX-RE>+U+LLu%2TiV>)4!dy^ojdNju#E_cEm<QCPe!Lv<%70D;Y54lQ`!dA=*DV()Y
z6|e;&h~SMlM&?+yfN2mVR3hqtQ{ul%Gk19EEA?Yxs7qLuI0QzdVhx7^fKG~tox?`e
z?{Edb6X)R^e@-EE+}sC}sK<f<OcO_a;VasD43(-!o>Gkn1I}A<NdwU=%@xOY&YGpO
zdQXgm=m!+n-~uCzq&(*VMjFq)U?#uV^v4Q<?6D697pPR`Z9ZP3T(`Kg<QB?RR~7|V
zUpq|xo>#b2xzAsHts-cR9|h3*e}RKi2(+)@tyui-t0t2E;REEJN2|79J&-pQ`9=6U
zKaF>Ebwx4IO+@$K5}2181!sRjfY$^yvTg^eVe}Eo(ovwckFrE;4fd06z8Z;G={EeG
z$5Xf~vn^Ak9YVR{w`A*=k%4SMbeLjsAscOG<<oLPppoFlyhj=6<X5Zx((fx-X^h&j
zwpr>yEE%t9Ofe{mA#W`8nH3!4GrBe;L2Rew;dBSm0A=TYp5lo4ZY-75rPUyF8@+C0
z$_7;k%8a!p^}rqhwL>JtYBxR8<hT14S14I4#FB9Kyl%B+Y{GK10d;c1n!Q?*f;O5*
zxGwl*U-Jytg$D~p`*=&{B=*l1rwKbNt$?P?)@+e#pSaMZ(8}-_7~23e;mSqJ!so<7
zJm*jWP>Arn;D|oMX9gG2QAFkWC4>l+8>DL0)8wG;Hi*NDVOh|MhiQ0fAN2;~)jG|y
z8<RiDP&9)%lLU$v02D-u@i?-2s9<Wl#MbW8%IPWLLWqwg*F)AGMX>)PyuPVt^afc<
zW1lP~8@I#ElIkQf$p(HFg+>X4c7z_pfBTEPka#In3Nqd{!VN%P$pHzms^Pl06gNrV
zP$Q{&#wBwi`V(>m?&lYMxMYt|hN!vTCc$k$#^9MAaN3-)p(jz0Fz55UAe+z21kdO3
zLoUk|6y2wYJ6%ZkX(rDwIl|;9lVeOCXL689fyp!zNRDyuFzIDN!FBI4xyNKLliz3Z
z2TcBu$saM{o05_T_ZL}|Qz-xM@3Z(e6Z(kme_`&wG12iI{ZTi^<TWO=$*i-;-O;65
zbNLKu)m?I;&RFF>>g5Bs74sazCvcIp&_?H9{GHChmThe7&UWH1bmw>5kF`J7-qfCL
z?`ofJzuf*O$hRZk-@dzjyG56WP}2SgK}2`Gm&0-;-~(e^2^kjHE+^jW#ILQ?eEGog
z@%SSxVXX#mst5bz9U@as#KR&8fghUVIMqdWia<psr0o&&OA>e9$`~(<n}CY913ryj
MkB4mPW&K_M56e+L%>V!Z