-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathProgram.cs
More file actions
84 lines (76 loc) · 2.43 KB
/
Program.cs
File metadata and controls
84 lines (76 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HAP = HtmlAgilityPack;
/**
* Warning : walaupun pake namespace, class, method dll
* sebenarnya program ini tidak dirancang object-oriented
* tapi prosedural. methodnya static semua.
*/
namespace WebCrawler
{
class Program
{
struct queryElmt
{
public string url;
public string folderName;
}
static List<queryElmt> queryURLs;
static bool forceCrawl = true;
static void loadQueryURL()
{
queryURLs = new List<queryElmt>();
WordProcessor wp = new WordProcessor(Configuration.getCrawlerListLocation());
while (!wp.isEOF())
{
queryElmt tmp;
tmp.url = wp.getWord();
wp.advanceWord();
tmp.folderName = wp.getWord();
wp.advanceWord();
queryURLs.Add(tmp);
}
wp.closeFile();
}
static void Main(string[] args)
{
Configuration.setDefaultConfiguration();
Crawler.initIgnoredExtension();
Index.initIndexTagList();
//Configuration.setTraversalMode("DFS");
Configuration.setMaximumDepth(2);
loadQueryURL();
foreach(var queryURL in queryURLs)
{
break;
Crawler.doCrawler(queryURL.url, Configuration.getTraversalMode(), queryURL.folderName,Configuration.getMaximumDepth());
Crawler.saveToIndex();
Crawler.saveResultToFile();
}
Console.WriteLine("crawler complete");
List<Crawler.crawlElement> explorerResults = Explorer.explore("hello", "kuliah", 10);
foreach (var explorerResult in explorerResults)
{
Console.WriteLine(explorerResult.url);
}
while (true) ;
}
public static bool testConnection(string s)
{
try
{
using (var client = new System.Net.WebClient())
using (var stream = client.OpenRead(s))
{
return true;
}
}
catch
{
return false;
}
}
}
}