-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRegex_Module.py
More file actions
executable file
·144 lines (90 loc) · 3.08 KB
/
Regex_Module.py
File metadata and controls
executable file
·144 lines (90 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import re
# .finditer it returns extra options with all functionality
# .findall() it just returns the matches as a list of string
text_to_search = ''' abcdefghijklmnopqrstuvxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890
Ha Yeah YeahYeah
#if Word has the space then it is a word boundary string
MetaCharcters (Need to be escaped):
. ^ $ * + { } [ ] \ | ( )
legion.com
123-444-5678
321.555.0987
800-123-1234
900-123-5678
321*123*1235
Mr. Jps
Ms Dubi
Mrs. Xinp
Mr Thei
###################################################################
# Character matches in compile regex
. - Any Character except New line ()'\') (. - period sign )
\d -Digit (0-9)
\D - Not a digit (0-9)
\w Word Charcter (a-z, A-Z, 0-9, _)
\W - Not a word character
\s - White space (space, tab ,newline)
\S - Not white space(space, tab ,newline)
(Note: \. called the literal )
# Anchers
\b - Word boundary
\B - Not a word boundary
^ - Beginnig of a string
$ - Enof string
[] - Matches Characters in brackets (name: Character Set)
[^ ] - Matches characters Not in
| -Either or
( ) -Group
# Quantifiers:
* - match 0 or more
+ - 1 0r more
? - 0 or One
{3} - Exact Numbers in count
{3,4} - Range of Numbers(Min, Max)
#################################################################################
Pat
Sat
bat
Mat
'''
#Row string
print('\tTab')
print(r'\tTab') # r is for row string
sentence = 'Start a debat and bring it on the conclusion'
#compile Method
pattern = re.compile(r'abc') #Compile a regular expression pattern, returning a Pattern object.
pattern = re.compile(r'\d\d\d.\d\d\d.\d\d\d\d') # it matches the pattern which was required
#pattern = re.compile(r'\d\d\d[-.]\d\d\d[-.]\d\d\d\d')
pattern = re.compile(r'[89]00[-.]\d\d\d[-.]\d\d\d\d')
#(Matching the 800, 900 , with the character set and putting literal)
'''when '-' is put in the character set at the begning or last it will match just - literal character
but in the between file it will specify the range'''
pattern = re.compile(r'[a-zA-Z]') # or just [a-z]
pattern = re.compile(r'[^b]at') # not a b with the at character
pattern = re.compile(r'\d{3}.\d{3}.\d{4}') # Matching the exact numbers
pattern = re.compile(r'Mr\.')
pattern = re.compile(r'Mr\.?\s[A-z]') #Question Mark for matching of 0 or 1 either
pattern = re.compile(r'Mr\.?\s[A-z]\w*') # word character
pattern = re.compile(r'(Mr|Mrs|Ms)\.?\s[A-z]\w*') # Usinga group pattern within
pattern = re.compile(r'\d{3}.\d{3}.\d{4}') # for printing the .findall()
#pattern = re.compile(r'\.') #literal search
#pattern = re.compile(r'legion\.com')
matches = pattern.findall(text_to_search)
matches = pattern.finditer(text_to_search) #(method) def finditer(strin)
'''
pattern = re.compile(r'Start')
matches = pattern.match(sentence)
matches = pattern.search(sentence) # returns all the matches at any plaace in string
print(sentence)
'''
for match2 in matches:
print(match2)
#print(text_to_search[1:4])
#parsing the data from the file
with open ('text2.txt','r') as f:
contents =f.read()
matches1= pattern.finditer(contents)
for match1 in matches1:
print(match1)