-
Notifications
You must be signed in to change notification settings - Fork 0
Nico
Notes on data structure with the help of Gemini 2.5 PRO
dt = await (await fetch('https://raw.githubusercontent.com/episphere/gemini/main/doc/Electronic path data exa mple(Sheet1)2_5_pro.json')).json()Proposed data structure for the 75 reports provided:
{
"report_id": "...",
"clinical_history": "...",
"personnel": [],
"specimens": [
{
"specimen_id": "...",
"source_procedure": "...",
"diagnosis": [],
"gross_description": "...",
"microscopic_description": "...",
"comments": []
}
],
"disclaimer": "...",
"certification_statement": "..."
}{
"Chronic cervicitis": {"enum": ["yes","no"]},
"HPV 18": {"enum": ["yes","no"]},
"Transformation Zone/Endocervical Glands": {"enum": ["Present","Absent"]}
}generated schema
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Cervical Health Data",
"description": "A schema for recording basic cervical health indicators.",
"type": "object",
"properties": {
"Chronic cervicitis": {
"description": "Indicates the presence or absence of chronic cervicitis.",
"type": "string",
"enum": [
"yes",
"no"
]
},
"HPV 18": {
"description": "Indicates the presence or absence of HPV type 18.",
"type": "string",
"enum": [
"yes",
"no"
]
},
"Transformation Zone/Endocervical Glands": {
"description": "Indicates the presence or absence of the transformation zone or endocervical glands.",
"type": "string",
"enum": [
"Present",
"Absent"
]
}
},
"required": [
"Chronic cervicitis",
"HPV 18",
"Transformation Zone/Endocervical Glands"
]
}txt = await(await fetch('https://raw.githubusercontent.com/episphere/gemini/main/doc/Electronic%20path%20data%20example(Sheet1).csv')).text()
rows=txt.split(/\r\n/).slice(0,-1) // blank tail removed
rows=rows.map(function(row){return {txt:row,report_id:row.match(/[\w]+/)[0]}})dt = await (await fetch('https://raw.githubusercontent.com/episphere/gemini/main/doc/Electronic path data exa mple(Sheet1)2_5_pro.json')).json();
diagnostics=[]
specimens=[]
dt.forEach(function(d,i){
try{
d.specimens.forEach((sp,j)=>{
sp.diagnosis.forEach((dia,k)=>{
diagnostics.push(dia)
})
})
}
catch(err){
console.log(err)
}
})
diagnostics=[...new Set(diagnostics)]const wait = (milliseconds=1000) => {
return new Promise(resolve => setTimeout(resolve, milliseconds));
};
res=[];
for(var i=0;i<75;i++){
wait();
res[i] = await (await import('https://episphere.github.io/gemini/extractNico.mjs')).extractNico(i)
}res75 = await (await import('https://episphere.github.io/gemini/extractNico.mjs')).extractNico75()i=9;
res_i = await (await import('https://episphere.github.io/gemini/extractNico.mjs')).extractNico(i)When running a batch of information extractions by in-browser AI nicoExtract75, you can keep an eye on the shared session. For example, when running extractNico75,
nicoReps = await (await import('https://episphere.github.io/gemini/extractNico.mjs'));the session is externally available since it is exported (note export { extractNico, extractNico75, session } at the end of extractNico.mjs module). As a consequence, the session is available as nicoReps.session:
session = nicoReps.sessionEverytime you call for the session you get something like
LanguageModel {inputUsage: 3081, inputQuota: 9216, topK: 3, temperature: 1, onquotaoverflow: null}The res method was written to record which report (out of 75) was processed. For example, if you want to compare report 9 with report 29,
res = await nicoReps.extractNico(3);
res = await nicoReps.extractNico(8);you just have to call the res method res
[
null,
null,
null,
{
"Chronic cervicitis": "yes",
"HPV 18": "no",
"Transformation Zone/Endocervical Glands": "Present"
},
null,
null,
null,
null,
{
"Chronic cervicitis": "no",
"HPV 18": "yes",
"Transformation Zone/Endocervical Glands": "Absent"
}
]If you only came here for the information extraction results then there you have it:
res = await (await fetch('https://raw.githubusercontent.com/episphere/gemini/refs/heads/main/nicoExtracted.json')).json()