Skip to content

Commit 2dfb915

Browse files
committed
Czech conjunction/particle "ani" and pronoun "an".
1 parent 61c757a commit 2dfb915

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

udapi/block/ud/cs/fixmorpho.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ def process_node(self, node):
162162
node.lemma = 'jenžto'
163163
node.upos = 'PRON'
164164
node.feats['PrepCase'] = 'Npr'
165+
# Relative pronoun "an" is PRON (not DET).
166+
elif node.lemma == 'an':
167+
node.upos = 'PRON'
168+
node.feats['PronType'] = 'Rel'
165169
# Pronoun "kdo" is PRON (not DET).
166170
elif node.lemma == 'kdo':
167171
node.lemma = 'kdo'
@@ -397,10 +401,11 @@ def process_node(self, node):
397401
#----------------------------------------------------------------------
398402
# Words that indicate the speaker's attitude are tagged ADV in UD,
399403
# although the Czech tagsets often treat them as particles.
400-
if node.upos == 'PART' and re.fullmatch(r'(asi?|až|bezpochyby|bohdá|co|dokonce|jen|jistě|již|hlavně|hned|jednoduše|leda|možná|naopak|nejen|nejspíše?|opravdu|ovšem|patrně|právě|prej|prý|přece|především|rozhodně|skoro|skutečně|snad|spíše?|teda|tedy|třeba|určitě|věru|vlastně|vůbec|zajisté|zase|zrovna|zřejmě|zvlášť|zvláště)', node.lemma):
404+
if node.upos == 'PART' and re.fullmatch(r'(ani|asi?|až|bezpochyby|bohdá|co|dokonce|jen|jistě|již|hlavně|hned|jednoduše|leda|možná|naopak|nejen|nejspíše?|opravdu|ovšem|patrně|právě|prej|prý|přece|především|rozhodně|skoro|skutečně|snad|spíše?|teda|tedy|třeba|určitě|věru|vlastně|vůbec|zajisté|zase|zrovna|zřejmě|zvlášť|zvláště)', node.lemma):
401405
node.upos = 'ADV'
402406
node.feats['Degree'] = 'Pos'
403407
node.feats['Polarity'] = 'Pos'
408+
node.misc['CzechParticle'] = 'Yes'
404409
# Adverb "brzo" should be lemmatized as "brzy".
405410
if node.upos == 'ADV' and node.form.lower() == 'brzo':
406411
node.lemma = 'brzy'
@@ -424,6 +429,13 @@ def process_node(self, node):
424429
node.lemma = 'u'
425430
node.feats['AdpType'] = 'Prep'
426431
#----------------------------------------------------------------------
432+
# CONJUNCTION
433+
#----------------------------------------------------------------------
434+
# As a conjunction (and not particle/adverb), "ani" is coordinating and
435+
# not subordinating.
436+
if node.upos == 'SCONJ' and node.lemma == 'ani':
437+
node.upos = 'CCONJ'
438+
#----------------------------------------------------------------------
427439
# PARTICLES THAT SHOULD BE ADVERBS
428440
#----------------------------------------------------------------------
429441
# "jako" should be SCONJ but 19th century data have it as PART.

0 commit comments

Comments
 (0)