1+ from pyspark .sql .types import StructType
2+ from pyspark .sql .types import StructField
3+ from pyspark .sql .types import StringType
4+ from pyspark .sql .types import MapType
5+ from pyspark .sql .types import LongType
6+ from pyspark .sql .types import ArrayType
7+
8+ def AQSchema ():
9+ """AQAnnotation Schema.
10+ Document Id (such as PII)
11+ Annotation set (such as scnlp, ge)
12+ Annotation type (such as text, sentence)
13+ Starting offset for the annotation (based on the text file for the document)
14+ Ending offset for the annotation (based on the text file for the document)
15+ Annotation Id (after the annotations have been reordered)
16+ Contains any attributes such as exclude annotations, original annotation id, parent id, etc. Stored as a map.
17+ """
18+ return StructType ([StructField ('docId' , StringType (), False ),
19+ StructField ('annotSet' , StringType (), False ),
20+ StructField ('annotType' , StringType (), False ),
21+ StructField ('startOffset' , LongType (), False ),
22+ StructField ('endOffset' , LongType (), False ),
23+ StructField ('annotId' , LongType (), False ),
24+ StructField ('properties' , MapType (StringType (), StringType ()), True )])
25+
26+ def AQSchemaList ():
27+ """Schema used for Preceding and Following functions.
28+ """
29+ return StructType ([StructField ('annot' , AQSchema (), False ),
30+ StructField ('annots' ,ArrayType (AQSchema (),True ),True )])
31+
32+ def CATSchema ():
33+ """CATAnnotation Schema.
34+ Document Id (such as PII)
35+ Annotation set (such as scnlp, ge)
36+ Annotation type (such as text, sentence)
37+ Starting offset for the annotation (based on the text file for the document)
38+ Ending offset for the annotation (based on the text file for the document)
39+ Annotation Id (after the annotations have been reordered)
40+ Other contains any attributes such as exclude annotations, original annotation id, parent id, etc. Stored as a name-value & delimited string.
41+ """
42+ return StructType ([StructField ('docId' , StringType (), False ),
43+ StructField ('annotSet' , StringType (), False ),
44+ StructField ('annotType' , StringType (), False ),
45+ StructField ('startOffset' , LongType (), False ),
46+ StructField ('endOffset' , LongType (), False ),
47+ StructField ('annotId' , LongType (), False ),
48+ StructField ('other' , StringType (), True )])
0 commit comments