-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
122 lines (102 loc) · 3.53 KB
/
server.py
File metadata and controls
122 lines (102 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from flask import Flask, render_template, request
import os
from datetime import datetime
from timeit import default_timer as timer
from urllib.parse import urlparse
from ast import literal_eval
from elasticsearch import Elasticsearch
MAX_RESULTS = 1000
app = Flask(__name__)
class SingleResult:
def __init__(self, title, url, ts, keywords):
self._title = title
self._url = url
self._ts = ts
self._keywords = keywords
def __str__(self):
return f"SingleResult title='{self._title}' url='{self._url}'"
class SearchResult:
# a search result containing a list of single results,
# the time in seconds the search took,
# and the keyword that was searched for
def __init__(self, results, search_time, keyword):
self._results = results
self._search_time = search_time
self._keyword = keyword
class Server:
def __init__(self, hostname: str, port: int, user: str=None, password: str=None, index_name: str="crawler_main"):
self._index_name = index_name
if not user or not password:
url = f"http://{hostname}:{port}"
elif user and password:
url = f"http://{user}:{password}@{hostname}:{port}"
else:
raise RuntimeException("Please specify user and password for elasticsearch connection")
self._es = Elasticsearch([url])
if not self._es.indices.exists(index=self._index_name):
raise RuntimeException(f"Index {index_name} does not exist")
else:
print(f"Elastisearch connection (index {index_name}) ok")
def search(self, _keywords: list, language: str = "en"):
keywords = []
for word in _keywords:
keywords.append(word)
keywords.append('*' + word + '*')
keywords = ' '.join(keywords)
query = {
"query": {
"bool": {
"must": [
{
"query_string": {
"query": keywords,
"fields": [
"url^4",
"keywords",
"title^2"
]
}
},
{
"wildcard": {
"language": language
}
}
]
}
},
"sort": {
"_script": {
"type": "number",
"script": "doc['url.keyword'].value.length()",
"order": "asc"
}
}
}
res = self._es.search(index=self._index_name, body=query)
search_time = int(res['took'])
hits = res['hits']['hits']
result = []
for hit in hits:
url = hit['_source']['url']
title = hit['_source']['title']
ts = hit['_source']['timestamp']
kw = hit['_source']['keywords']
result.append(SingleResult(title, url, ts, kw))
return SearchResult(result, search_time/1000.0, ' '.join(_keywords))
@app.route('/')
def render():
return render_template('index.html')
@app.route('/', methods=['POST','GET'])
def search():
result = None
if request.method == 'POST':
keyword = request.form['text']
language = request.form['language']
if language == "none":
language = "*"
result = server.search(keyword.split(), language)
return render_template('index.html', data=result, n_results=len(result._results))
# main application starts here
server = Server("localhost", 9200)
app.run()