Skip to content

Commit f9c3b2c

Browse files
committed
feat: update articles or snippets
1 parent f6ca87b commit f9c3b2c

File tree

2 files changed

+107
-89
lines changed

2 files changed

+107
-89
lines changed

dd-search-engine/sg-index-doc/src/cli/build-meili-index.ts

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,34 @@ program
2121
"commit-all.sh",
2222
"Private",
2323
"Playground",
24+
"Awesome-Lists",
25+
"Awesome-CS-Books-and-Digests",
2426
];
2527

2628
firstDirs
2729
.filter((d) => !ignores.includes(d))
2830
.forEach(async (dir) => {
29-
const dirPath = path.join(baseLocalDir, dir);
30-
31-
const repos = fs
32-
.readdirSync(dirPath)
33-
.filter((d) => !ignores.includes(d))
34-
.map(
35-
(repoDirName) =>
36-
({
37-
name: repoDirName,
38-
title: repoDirName,
39-
localPath: path.join(dirPath, repoDirName),
40-
description: repoDirName,
41-
sUrl: `https://ng-tech.icu/${repoDirName}`,
42-
} as ReposityConfig)
43-
);
44-
45-
await buildDocIndex(repos);
31+
try {
32+
const dirPath = path.join(baseLocalDir, dir);
33+
34+
const repos = fs
35+
.readdirSync(dirPath)
36+
.filter((d) => !ignores.includes(d))
37+
.map(
38+
(repoDirName) =>
39+
({
40+
name: repoDirName,
41+
title: repoDirName,
42+
localPath: path.join(dirPath, repoDirName),
43+
description: repoDirName,
44+
sUrl: `https://ng-tech.icu/${repoDirName}`,
45+
} as ReposityConfig)
46+
);
47+
48+
await buildDocIndex(repos);
49+
} catch (e) {
50+
console.error(e);
51+
}
4652
});
4753
});
4854

dd-search-engine/sg-index-doc/src/service/index/buildDocIndexWithMeili.ts

Lines changed: 84 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,77 @@ export async function buildDocIndex(repos: ReposityConfig[]) {
2727
attributesForFaceting: ["categories", "repo"],
2828
});
2929

30-
repos.forEach((repo) => {
31-
// 获取仓库的配置信息
32-
33-
const files = walkSync(repo.localPath).filter(
34-
(path) =>
35-
(path.endsWith(".md") || path.endsWith(".pdf")) &&
36-
path !== "README.md" &&
37-
path.indexOf("Weekly") === -1 &&
38-
path.indexOf("examples") === -1
39-
);
40-
41-
files.forEach(async (file) => {
42-
// 封装出 href
43-
const href = `${repo.sUrl}/blob/master/${file}`;
44-
const absoluteFile = `${repo.localPath}/${file}`;
45-
let fileName: string = file.split("/").reverse()[0];
46-
47-
// 判断是否为 PDF
48-
if (file.endsWith("pdf")) {
49-
index.addDocuments([
50-
{
51-
id: md5(href),
30+
for (const repo of repos) {
31+
try {
32+
console.log(`>>>${repo.name}>>>start`);
33+
34+
// 获取仓库的配置信息
35+
const files = walkSync(repo.localPath).filter(
36+
(path) =>
37+
(path.endsWith(".md") || path.endsWith(".pdf")) &&
38+
path !== "README.md" &&
39+
path.indexOf("Weekly") === -1 &&
40+
path.indexOf("examples") === -1
41+
);
42+
43+
console.log(`>>>${repo.name}>>>${files.length}`);
44+
45+
for (const file of files) {
46+
try {
47+
// 封装出 href
48+
const href = `${repo.sUrl}/blob/master/${file}`;
49+
const absoluteFile = `${repo.localPath}/${file}`;
50+
let fileName: string = file.split("/").reverse()[0];
51+
52+
// 判断是否为 PDF
53+
if (file.endsWith("pdf")) {
54+
index.addDocuments([
55+
{
56+
id: md5(href),
57+
fileName,
58+
repo: repo.name,
59+
categories: file
60+
.split("/")
61+
.filter(
62+
(c) =>
63+
Number.isNaN(parseInt(c, 10)) && c.indexOf(".md") === -1
64+
),
65+
href,
66+
desc: fileName,
67+
content: fileName,
68+
},
69+
]);
70+
71+
continue;
72+
}
73+
74+
// 读取文件内容
75+
const content = await readFileAsync(absoluteFile, {
76+
encoding: "utf-8",
77+
});
78+
const desc = (await readMarkdownHeadersFromFile(absoluteFile)).join(
79+
", "
80+
);
81+
82+
if (!content) {
83+
continue;
84+
}
85+
86+
// 这里对文件内容进行预处理
87+
const filteredContent = (content as string)
88+
.replace(/\n/g, "")
89+
.replace(/\[(.*)\]\(http.*\)/g, (_, __) => __)
90+
.replace(/```\w*.*```/g, "");
91+
92+
const contents = [filteredContent];
93+
94+
if (!contents) {
95+
continue;
96+
}
97+
98+
// 分割过长内容
99+
const objs = contents.map((content, index) => ({
100+
id: index ? md5(`${href}${index}`) : md5(href),
52101
fileName,
53102
repo: repo.name,
54103
categories: file
@@ -57,56 +106,19 @@ export async function buildDocIndex(repos: ReposityConfig[]) {
57106
(c) => Number.isNaN(parseInt(c, 10)) && c.indexOf(".md") === -1
58107
),
59108
href,
60-
desc: fileName,
61-
content: fileName,
62-
},
63-
]);
64-
65-
return;
66-
}
67-
68-
// 读取文件内容
69-
const content = await readFileAsync(absoluteFile, { encoding: "utf-8" });
70-
const desc = (await readMarkdownHeadersFromFile(absoluteFile)).join(", ");
71-
72-
if (!content) {
73-
return;
109+
desc,
110+
content,
111+
}));
112+
113+
index.addDocuments(objs);
114+
} catch (e) {
115+
console.error(e);
116+
}
74117
}
75118

76-
// 这里对文件内容进行预处理
77-
const filteredContent = (content as string)
78-
.replace(/\n/g, "")
79-
.replace(/\[(.*)\]\(http.*\)/g, (_, __) => __)
80-
.replace(/```\w*.*```/g, "");
81-
82-
const contents = [filteredContent];
83-
84-
if (!contents) {
85-
return;
86-
}
87-
88-
// 分割过长内容
89-
const objs = contents.map((content, index) => ({
90-
id: index ? md5(`${href}${index}`) : md5(href),
91-
fileName,
92-
repo: repo.name,
93-
categories: file
94-
.split("/")
95-
.filter(
96-
(c) => Number.isNaN(parseInt(c, 10)) && c.indexOf(".md") === -1
97-
),
98-
href,
99-
desc,
100-
content,
101-
}));
102-
103-
try {
104-
index.addDocuments(objs);
105-
} catch (e) {
106-
console.error(e);
107-
}
108-
});
109-
110-
console.log(`${repo.name} indexed finally.`);
111-
});
119+
console.log(`${repo.name} indexed finally.`);
120+
} catch (e) {
121+
console.error(e);
122+
}
123+
}
112124
}

0 commit comments

Comments
 (0)