From 9beaf4b0cd1ab188c55deec7f136b3f6cb0a7f94 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 16:25:59 -0800
Subject: [PATCH 01/10] init commit with txtai rag for recommend

---
 solo_server/commands/recommend.py             | 140 ++++++++++++++++++
 .../solodocs/Bug Report Template.docx         | Bin 0 -> 9123 bytes
 2 files changed, 140 insertions(+)
 create mode 100644 solo_server/commands/recommend.py
 create mode 100644 solo_server/commands/solodocs/Bug Report Template.docx

diff --git a/solo_server/commands/recommend.py b/solo_server/commands/recommend.py
new file mode 100644
index 0000000..19d4eec
--- /dev/null
+++ b/solo_server/commands/recommend.py
@@ -0,0 +1,140 @@
+from txtai import RAG
+from litgpt import LLM
+from rich.console import Console
+import os
+
+import nltk
+nltk.download(['punkt', 'punkt_tab'])
+
+from txtai.pipeline import Textractor
+
+from txtai import Embeddings
+
+
+# Create Textractor
+textractor = Textractor()
+text = textractor("solodocs/solo-server/solo_server/commands/solodocs/Bug Report Template.docx")
+print(text)
+     
+
+def stream(path):
+  for f in sorted(os.listdir(path)):
+    fpath = os.path.join(path, f)
+
+    # Only accept documents
+    if f.endswith(("docx", "xlsx", "pdf")):
+      print(f"Indexing {fpath}")
+      for paragraph in textractor(fpath):
+        yield paragraph
+
+# Document text extraction, split into paragraphs
+
+# Vector Database
+embeddings = Embeddings(content=True)
+embeddings.index(stream("solodocs"))
+
+console = Console()
+
+def recommend_based_on_docs(query: str):
+    """
+    Generate a recommendation answer (with citations) based on the documentation context using a RAG pipeline.
+
+    The function uses the following documentation context:
+
+    "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. Additionally, having only the most relevant context helps the LLM generate higher quality answers.
+
+    Citations for LLMs
+    A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model.
+
+    txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match.
+
+    for x in embeddings.search(result):
+      print(x['text'])
+         
+    E5-base-v2
+    Image Captions BLIP
+    Labels - Zero Shot BART-Large-MNLI
+    Model Guide
+    |Component |Model(s)|Date Added|
+    |---|---|---|
+    |Embeddings |all-MiniLM-L6-v2|2022-04-15|
+    |Image Captions |BLIP|2022-03-17|
+    |Labels - Zero Shot |BART-Large-MNLI|2022-01-01|
+    |Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01|
+    |Summarization |DistilBART|2021-02-22|
+    |Text-to-Speech |ESPnet JETS|2022-08-01|
+    |Transcription |Whisper|2022-08-01|
+    |Translation |OPUS Model Series|2021-04-06|
+
+    While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references.
+    The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. 
+    This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference."
+
+    The function prints the generated answer and the corresponding citation.
+    """
+    # Documentation context to guide the answer
+    docs_context = (
+        "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. "
+        "Additionally, having only the most relevant context helps the LLM generate higher quality answers.\n\n"
+        "Citations for LLMs:\n"
+        "A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model.\n\n"
+        "txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match.\n\n"
+        "for x in embeddings.search(result):\n  print(x['text'])\n\n"
+        "E5-base-v2\n"
+        "Image Captions BLIP\n"
+        "Labels - Zero Shot BART-Large-MNLI\n\n"
+        "Model Guide\n"
+        "|Component |Model(s)|Date Added|\n"
+        "|---|---|---|\n"
+        "|Embeddings |all-MiniLM-L6-v2|2022-04-15|\n"
+        "|Image Captions |BLIP|2022-03-17|\n"
+        "|Labels - Zero Shot |BART-Large-MNLI|2022-01-01|\n"
+        "|Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01|\n"
+        "|Summarization |DistilBART|2021-02-22|\n"
+        "|Text-to-Speech |ESPnet JETS|2022-08-01|\n"
+        "|Transcription |Whisper|2022-08-01|\n"
+        "|Translation |OPUS Model Series|2021-04-06|\n\n"
+        "While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references.\n\n"
+        "The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. "
+        "This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference."
+    )
+    
+    # Create a prompt that injects the documentation context
+    def prompt_with_context(question: str):
+        return [{
+            "query": question,
+            "question": f"""
+Answer the following question using only the context below. Only include information specifically discussed.
+
+question: {question}
+context:
+{docs_context}
+"""
+        }]
+    
+    # Create the LLM instance with a system prompt template.
+    llm = LLM("TheBloke/Mistral-7B-OpenOrca-AWQ")
+    
+    # Create the RAG instance using txtai; the output mode "reference" will provide a reference id.
+    rag = RAG(embeddings, llm, output="reference")
+    
+    # Query the RAG pipeline with the prompt that includes the docs context.
+    result = rag(prompt_with_context(query), maxlength=4096, pad_token_id=32000)[0]
+    
+    console.print("ANSWER:", style="bold cyan")
+    console.print(result["answer"], style="white")
+    
+    # Retrieve and print citation text using the reference from the result.
+    citation = embeddings.search(
+        "select id, text from txtai where id = :id", 
+        limit=1, 
+        parameters={"id": result["reference"]}
+    )
+    console.print("CITATION:", style="bold cyan")
+    console.print(citation, style="white")
+
+# Example usage:
+if __name__ == "__main__":
+    # Test the function with a sample recommendation query.
+    test_query = "recommend how can I optimize model performance based on the docs provided?"
+    recommend_based_on_docs(test_query)
diff --git a/solo_server/commands/solodocs/Bug Report Template.docx b/solo_server/commands/solodocs/Bug Report Template.docx
new file mode 100644
index 0000000000000000000000000000000000000000..54ba636111ccc27b942c2c28fd63b091892700aa
GIT binary patch
literal 9123
zcmaJ{1C$-zvOZ&58QZpPW{gZGGa1{;*tTukw#{Uiv2E*3a__r;?*HDs)obl@x>r~4
z-L-4i{;Ix~mj(ev1wcYV0;aqTKLP#_<oEwhR<`=|mJSxW2DT=aMs&^==J|2bmOTuJ
z0UxKh3C;+C>Vw2U(M3-RS{oofn8&K$=U4Y`-8v8;K&zn5s31qaSq;RlF>B++;T&^t
zeYKYw$W4N6DhPl|&)+_}a^N5a*%M@r61$4SX(k@+?{&-E7b@oraQF3^qElcYZN6g*
zCK9)+=n4<p;R0m`N4q}{*2n`i`r|6*(l}S6E8gp|1aDCmGmw#j9p(wa#%CF)E#51}
zB#n#7f8jL+a~yU}D1)U%oqvsE6rvM`6Knmft~RTz{RwiHRsU|D&{1mc>9#?dEuad)
zUG*}}*eXhcC5{_IRc%j47-tX{OPvTyG!fZjw~ShWz}}Z~#RkmOHd5%c#DrYgHL3wa
z&XIB(#|6U-lxI~Hi1+bZabr$wXeFfJ4vabMB7~rwfHsIez7LE2=pxKME{ma|206EO
zbdndWU_F_G)61fbn?YhE4vm$`;+wIRG4%4%OEtpDCU`{VDTs_EVD<@6s|nZ%^a!J%
zpy%{i8BAuwQJD0#1Rz)*N%h$El~6i=GOg%3k$;Ux`66u*oYMD|0E8D#ADOlTNM0HO
z($9JlGZzQ|NC5``<p1LoBmM6b+Zou~zfZ8;?-@>5woQGX;moYF_38Fr!iH3a?q=hH
z6H&XJYao3|uK})b5}D`Qcz`!TI(&gz!s`zY>bX%m`pV2S+6WPBDpB~Dzz;M*0?0}V
zyX$v@cRMKRA7H=&WT0bwQ5e@9o_6wwVXNoTnP9{b$+jpU;_InGiF~PgvcE-zKpN5H
z?M}6n(-FhpWUES(H2Wl+Qs&;G=G(LZ@rvzc%@G|TNy|kih@sE5>eo<E5WRdnpHkX%
zq_aZ)kp{Ue7RK`R6kahYgbKyjjcy#X6u|j<L>Q6gZx^0|3}?2`Bw{xQW|}u3B`p$W
zbEs*A6C}w)=#9tdEYsX$FlLPcU;KfggQG$~_!t7C_2Z{dEPVYj|F24PGszy)?E#PW
zt}q<LnR~AxawVJ8K36sMc+j@+z$)^~03AC>nH-SrwJeUF>?9k`;<QSMq#i|KKJ?<|
zqfFV&Qrkrp`RHiXSNxN}gnFz|ch*41TX3?YC%^%mjvfB`0lL@F9*@{I&m<@6+TbEA
z4%TBgi<ked4$lT>k2{8W{svl;#faxlQ~upe5sM=nW=;d4Z+R+x5^9y<Ey$^pYKdqp
zZnhbc_Us3d)t})}HuXmX3LJ@L-kQ)uTAD3MY-h+QRwb<I88rAzEkVQA6Wq~GMameC
z+GLzMb!Fz84|69~sg5Ux+6C>j78WK(X6%TBiNH=R+F_rt>T2yG1zmCKRi;#P8{n5*
zzOxA~JC}1&=3t^hBnDJnyAWc=!gceDKZSqaBX$Eee|vW~2uKm$dL;p106-k_zq#MP
zB8;JxrM>c3U2}uq{ueK8x%`g^1Nn0~F8sTM|BzsG)=FI|3q(>>oJbNqEG2b^JEbPV
zJUp<5mgMI7o1w%j79UTCS^P1Wi;1k9>S!EE55s6arQPM@-r!~i37^+SRv8i}C*()G
zMs2*5vMREBl9BL9mQhJGpHYgNGGwgh8LqFoRaHfVeIIFx!GHd+>@IUe+)h<y!rAAN
zWzXwfNc(PF17AHAlYP?wSx!<bN|?Nl*A85`%`2HfTUyVskrbUHUkz^NMw)(R;zCCY
zUW!-qW{%`q{`f6CH71;x<$|V;i`10HHZTF|Y-|6Dk^@ogzz@zG$yZkj>TUO!dYWL|
z%Ou?f2+NjU@6>WOqR57D>x(mMew5OFqMLA6uLBx`9{=QxoSf|i7-5M}xG&)zfjZa+
z7q1jy`GgZtPvF|X?y<Hs{XH1uM0uk9>Acs1Sgm{v$8W`5!M{C^d}%4%u`^Ye`w2u%
z+X|$t5|FwKxSI!%t?n@zD`(h|bcWYra@3qb%mRR(CmQ{%;+(Y3Iar>7=Vc#+<=ial
zX-(4y0$`69qJ|q<KgW<UXXzO|L?EMP<42NLiVZwTk#uS=QP#C4HF*AxKm(ahE4}X#
zXz9O3AcTKKAp39T2EXIaO5%xSE(dBrrOp}QrY8&`RBAXG?u{^W>x{V(>&cfcpfsKS
zg#?Qj!wL_JD<ohNqPm%e*;h!u+iw#WM|ygE?jDlE#-V<FVRY#i^?PB6;p$-rE(gy~
z&pW24VR^t<AYV~w(|Kbi_EOjQPS6!kpkjzmKJE~OC&=ZXYByDcA;<Z&8R96cH0u(9
z<)`YN6cDgsc7~{r2yP&vl-*XGee+h*S(q6H)j|y_MHTmvY)~99=vJl1$=)oyMs(lY
zr9+?@Q51;}--i~lhYuS8wbPiH$!1wej*B{JmNZvVj99+lo7M&5ZvQ|z404hb)U1OP
zqo|30IT6Bf>jG(v7%!fQ8ljhvoabfY&zL_AP^qqTI2K|F<&xRX@+Wae9&YW}koQ_>
ztn~tNnS0?pRq2QGxm8Rqg0_Vuk`gFF76?Fu3}vI}fN1eAN!oI#w6%=Vg*5{@$d=d<
zrRKiH+PRDah(jzrI@DGA7(C0L&Jxxw!Q)KQf!xyySYzs}f{W}MeCYj=;Q${&zXkff
z%J&5My?~pe1I`1nF$%L-RW!5QDV%Jtb1zsa)@C7?L~aEfEw7hFeetKDnR4<>LOeyL
zX+GWV!OX7at^n)%UCcD2eW<Hx6(uo)165p+f@M{*l{R2qWu<sfN^Q*gNVrB%dskyZ
zEHIxBr=VvSs{hk4q;0WA1|F#a(q}yQ;0=#MbW2>U4v48Anufm0A!>Q1zr&AJ=%m?k
zk6>(fCpdelqSIcMCuQ7n4|gx)k=Kf~AS%a}iv>S!u%W`D%_l00_`0_GdUH8CcMwmZ
zh5UjX*pA%9H?~cWWg?LRG<fr@?wA+!`<H!DhsDtTMS?zjGhyf~M$cIo5^P6!Gy6g_
zN4vmllTZ0uXg`$7S;|mnp1vNt8uDf1w%HeCIC<=p67ehVem7Gyv@BKGKvZ~%&3VL^
z`Yt)8_Pt87LL|f9{O#whR5UrcXhxe=^xh!WlQ3(<hp#y$%fSy;>Frk$wN9_#n;$=h
z@V`iGK3#Qe;s@g{bS5m!!dDks5!@c<Ez}~A-d@x8^L;CI&R^)3toGGbGXDwr)EV>^
z7T*C?f66iUSfPRtq1^cl&vX=JdrL)`StIXIqT$eHCP@BDAi8!w3&yT^Tun%Z%RVeQ
znYtYu>GiC;s-FM2=M7gR6f>#K&x84fZ|(%a;?cvepocqxbQhL}XA=qefNe{()#6#`
z%6qm%C!AsR4BS76{i*nhSf~J1dNLBpIX$hvX)w6?M;<~<!pF;9{$J6b*CtY_0|Wr@
zMEh^i|6kxk-%8KH!obr0SMWd48n?w}NA~b>hoEURrLKs1ARo4=i`J^y##fOH4vC3v
z=UkVG^@RkH0l-r3ZjY`y-o>=*Q>zp&_3|ZU5=2(P=>+7FC>CIn4vft}gkJgLWp!Gz
zcv-UWdAh{gG6Cl&qdtYp)vgc_5MeSi{<LcAaK1RV@ii6}m1#z;T1)b><Z-_YoE&tm
z4!cD4Ba{%_Z)aJuA{o$kEl-F*mMFFyfd%Emk%(9HO*CDQDsCC84UCpyHdW8GF(fOu
zY<($Gh>Cs+{~X-z)7Kj(DrL&a&@hyN1FH_&pIPJgX{WVgNI}8iu0H`d!XZkCoMx7^
z-d@Y}o`Etr?9{Z6VrR#7%RIiaKO_PvBJK$xJU@h)lz0m34DTUODJ1<C8n%w4fK;`(
z=f*05Rgw}#iAj~i5;4W2HF%9KX;)$#i)qV$w^X^0pX;l;=#Onj+I_6&H2Tcy_`G{?
zFd73}R<t1p2@xoj0+O!z>@EX)s0%_gVlK2@=cX8rwJcO-E@hT<sNz=weeM(NP1}qq
zSi|oDoe@jYq*vx-r`$*Ksk2=Zr-4ZOt3>Dfy^uT|vIIV*H^y~U+dR1Fo*&Z1mAJ}|
z5mbqFiaNe0^ca=Tz&2?rp*5{g(&zc|TWCm^EDyjZi&Y3)@(4c7BwqBrjt@B_j4S<I
zwyF#D4BwnXI$vg=v|KyH?e^OJN#kr=P_c4+d~BdRz2)rExJ4D4PWdEm#3F>**JG~(
z2z7MADdx-K>%NX*25L{+f=j_(G+xX(l-XFDi;HV1g@ajgX)gd1y&JT4GZ3Lo4tNu}
z=u0+KGb)oT5b(tk+#05qKR-Q*Op&iUrgCyulbPy6`2Y(*`T+E8c&xPTW6~TaB8W*q
z0Wu_8Oqw*^aC3fMgLP4sodG7YO_uY?MXo!%h#*<~v}*(3!2Ae1KR~9nzV}*#kA!#7
z##SK<jcBXUZ15h&kOP!*o4bDN#{10hvl^Nt5@Wb~>`v!V5*d?PN3UrB<e=rrItMT=
zWx^sJzeoUo32raOuqO|Q_Ye>xR21J5FlG&v+b3@Q2{`?f4<;5>F*bN$ChquZNoRFy
zExvE*iW6&3yVAi7?XFL2h!+oK4fpnH1A9@m`!Nzo6e1Ay{>H8bK^@1f_!Dy+#3&T1
zlT=|pbSL;bN31;}VsDJ1rjTN(L>lntB2GNAmLZtrimI<*fk_dKFHuqq2z+piaeu&v
z_zrEyLD0TUnhjnbU4Fy`byI}o>k|xl4PDB%#-jveqnOxx!JgHb4W4fS2|^8C6lqPa
z60t-CXMA06@mNo;2TNnxn9sDn4Wut5ED<U6$OzN+c-_d{?)rd5>evsG%&*IcC{?wT
zh)%#2s2&vceG=*7b68SL4_E+>cXk;4X}L@$h9|VGyN4wi&Pb(}dj8S<oI7^B6DMp~
zXW$usEERbEN_?GwSj?yixA|<y(!<&s20spAcT=GrWrao^eR%Xiia#1Q*^x&QlppMu
zmy#Lna+MZ`&<l4Dq3XR_S;kmc7=#71KBX|ZytXvxsa!`_F@5MK>3-cOyC6@5e6+?3
z552wVtp@U%ZAXQ3hHPD9|ACf{_U#JE8dkL$2jAD5!u#3z_9?$0XuAl<p3k$WYU^n7
z+uvp3s9;it-n75GWJ22?87}84l^DthgmIlX&Z1orp=(c@S+sA3o5Y?v7nVKDsC}|?
zz{0g=Lp6Y@3Yf*<3Bg~?`kCnKFt;PN5bK+gf;ss8Qp0GK<QaT!?=f($O3X;gFl$WT
z8_IP2+A7x{o3>4jwvB$?RkrS&dgMwJzFG)Oli9b0u@u`MULpGkqwQjc^MP5mwlk{I
z?f7j#$P)F?NxA3|Y)`;$;U^JgxO^=a_xJ?*LrSoE1nYr4TZ2_@NxaGrEHSfjp#}C`
zt6U{~!NL-pV5h_fY=@K;Lsz|1wh|4EA7!?PyD{f*t<1_A5C_3w6y$?K*ds5idxhA&
zjnfrt3auqL{H}=`S6mH@ZrDG{%1r0zKR=r5B2$h~;yw6{L)h~!sCR_ByhI$TbXGZA
zJv5|dw$C3w>?Z>RioOcC;V>uKdB+M<!hQzB+*9vNx(8t-oFU;53guaDgiI7}Hz?Go
zj&vm%2pBD_K(LGM(KAGLTbK+KHusET?74wSecRc?_aHJ-#<m6^;eb-CGxS8WKak7|
zbUf^0C-M`-Fh+IA=H}u1B~4{(c0#)9y9b3;XlFeK$3JJURAmYMOkOS5#&OHa(dq={
z34{_!OfwO7s}G)dqGSfgMkfNOOpMswHJm$Hy(uv~w^!oD`b8j8W>HBIIPPLpAPSGd
zHs*guNlc$Tt73(kWh<2oA}+9u7w$d9mOpIJB{X)-`;oFn!`_f!WO+j>K~ahmXTK(o
z?s}jr#7Pcpekof*R$Cli2leV!lBz)zM4(Wv{XF7-f}nLMS|y2N!DIjiwOW;oZDCu~
zTo=LTn_C;aRPNK#F=x1@?-;sj&vrOM$UAJ8C|$x&$i_&QLo0AV#tD=*0^E#hYhEai
zlHBR~L`=oiT9?_EXfPI4^s(q!{Bm^i%>z4!L7DBGh=NXaP8<&vgZc;tE+jPGK@+UM
zf5e5{XH(Z;1xC=Ox@N*Bt4Vz^{yD0tNo$ey?$-&}&97GU@>h#?{@(5w93ya|ZPDzd
zU9Y#|)DdQ;-Qc-(ef^-f{`~bD4|G%9idc1rrZ&{&=dcs1N&|W*ptM2y^;pJ+(ze^r
z)$hMYjYMwNTRgDog_J7CuFDLqT=>l_s)TftR%(u-jt?HGS(E(>6~~(=xHD^Nu;A>I
zJ=Tk+>1X$4;<`{=5)YNfROgBuF8+J33!In2+usc4!xAB!7ySz75Yz%>$MDw+BrQM6
zk<c%oy}rh_-id9StVMi6X#U=Kwz<$?l<u9I8tL;rd!*tEegBm*7Mtya@?H`gd!1`*
zjoGU7w>JkS!S;)UUWBwDKoFsa)WisE@0VFOR2w7!UY?<{Bkc>k{ZvV3P6`hQ;<f~|
zdR^c{E;HZr&K(9t!;-3`c-RR$JZf%Q8~Rv(cV>nraP${9{314pl<EGDu^`zPNapPj
zXRa_`UG8SrhJwQ(We*zJs@4vw2B^y*P<EmiMBG=P*4E^$syV?n!YaHNF#+|VUM{2Q
zMMhVjo=$i%DV$syg5=W_u_+)<5w{4ysNCkI+rbXi``D=9&8O0ES^J{Ag9JrjUGu78
zLc%SQNJrukvSF@x;?qaCG81x`oiawCHGBEF0P#1Pw2>^ggYg+RQxxJnkqU%g@EgCp
z)w3*Uit;>|=kekzL_l9%-is!sTfVgEsqmW^6na#Y_hV;X$CwA;<9bnXH=SCDSaV%0
zhe5>HrDDz9dO|Z5Cr6uZ{+wJ&_hBAWXg9y=Jzg(47HqvsXY%0EX&ga>b32#s4%)aD
zgqJwnfJg43dRjSkUrWLE5odC^;xL;S5nef7#e!D@O=e%|XIp_oBdJhvJX>L}u<K@L
zrf+I66KLOq5HFleHF2}s$0sRJxkY+qYcUl_CF+Lui0^j{;DxY>qEKBta<-ztv*ueq
zw9wLY%pcE9R=_2*zOke4h|x3Kp)*%bQ^EEe(+cW%On-d9C>*5JoG=4P#<s~*zSoNn
zhamloFB<P2;^ZWkzFV@QPLDxicR<=W?4{)XFu*HsWgbz<4wY^!oIA>{S4?M7QfaOj
zY$J9rH|h|5QW17cS<k&>qk5_rpunujqE~KMdoYHkI|d%ByIH|ZX&*J)x^j*id?eJ|
z&J;JP6#`0R%~cquY&ovpjhMMC%G4H?!!zVnuyWq+SU;xJtjSM5PxFx!Wx~utFlkh!
zSO)+t0@`m*6~h?`1utg{=bP55Mdu+nS^xxvRQt#|U>aLv+vpKd#c=g_GB??d<~%a8
zLiJQjMY^PN%SyXUuyI29AhfB^KNR7*g=_10ul+{QPtt=B0SzHUW25+0Li-(il^ep_
zB0Uh&F$+EhII?#T^0z#4bG}Dl?)J>cwYn`D{CCScZ*Y4b;tqd<HiZ;fD$-8jEQV5&
zD>*i7Azs^YQw&(8fe}syx1-a@*DOpW)2uq;TeVT&HRuw@YBf)NMgCgX@!YIod5|E5
zWRW<UZB5a^4n!uGcX3of48``cV`;#vA!u`vB7I0)D<rJ{{Iq!f?7FOW%Ffjq$!mZ6
zGtUwzSD9^tx+Gp%Ou>*RYVka73F~P#>HG%wts#2xxCd{SrPVf&X`BwlNp8hsYqp{l
zN5W)ebQ4B<`=Fl$#H|QUrNMYQt>%>W>BV`|BaRUJ<156kOeX{^NV637z06wm-!dJX
z|1EN8+ZveL(f^t6(EYkn9wTeo%Yf+rUQ-WRoUfV1mkLgkh1A#l2|{ssANYxwP*%$Q
zDI|^r9(2|}@hte;)p*!kDn56R6>POrUe=H;qQt-t-g;=h(v>ajprI5~UJfBJVzGrd
zgd+qTmco>U8@>=!AqBy~<F)*-0^w4<T^>ajlh0Ktv9RUWOKu0~J5-a}21$kj-(o3m
z>wT&!VGw`pU+^zhn7H_A8bzQ7pVdXBnKFVMP7zUa4D;6xDSjAwf-NP*Bw{Mu8K53m
zOcr+jgq&^D7taS9tMa*wGiZ;LaNii=!^}^$piowbi<*5#XB=ynkXXLPME1<KXRu$U
zivpA$UA!BH<~>pT&lPHz_dEV!wcnNspX7$`VSoi+N3jkZ;+7rtLNuKLBXkhm0Vr%1
zAdL%4I$z)F>j3(?jGyj|nosZ?$=Jh&od>guO$qk%)CNqz_coeVfh09JNDa@Tx_zLy
zI!r%ugno9M(H9~DvkC5wvs+L~+4fV-8-cJlV0ttbr8D@>0{IOI>!6^bk);E>i(h7j
z8oXK!F6t}PedNjDv;>dyxBC6dNEGd0qR=8n=BB5jBA5Eo9^f6?7w2gH-<2?0<4L0G
z_xGTBuZI2SH+|2Q|3SCd8yi>{(En~3f2YJ6(pJlCIGqR!P6YN=28*hPpbjAm)yw6w
z*d&&LcqYLK3-UPvGm>uDOIuz*y)9n@(H1NMziN7;FXTV5EOMFSPE}TQ;w4a~PUOlq
z!j~hkNxr<EK}bGa#`gwgyAzS@*eu)9&C!VHMn18?FU_3@z6p&ADQ+wbh7IOf$OhF_
zlzHBF8?+S&AtQlaSAXucpjM{RsI@rgW=g>LCZy}kwn@|jT|iXj?v3DO)pzVt-k>X<
zG=Lsr18Qy+F7}<+II5;@u5ip1wnLY67A!JYi$1nUNnhP_Qqpr<#4%amDXE?|YU|1u
zh*KF|SDngvs=;(|ux6fqnG{J$+L*P)<CbzC8rZWkV>(zz9hvK8_b`7v#*=riGwA5X
zWhF!Daul#|)um_zw0zu{HW60{J`xz(*2>yASGLLe8V>esS~zQ?Mtwg3yY{*3y{0f8
z?gF-!wy`4m!i~LhDg|~naZU|$|C;|J-ViO*TS#oH9n2nUl8K2_zi*M1xDD)ljs?h&
zLUXhtjZbydmbT}vX&;0DhEA15%rqtn=@u~G9|jL53%1VcQ&0X7LynQ4s3GCvLsnwp
zo~Bb2gt@&_i?7Y+>JxFw&)f1gI_(P~e&@+1uhnalrPO}^8rq5P9`fpPxe^+voqVA<
z`1U$$)cNw5k6C2DepQM8c5!`iWySY)6&MJ9AJ8R?%a?@|y-&s|z|8~P3ky6XpBAo*
zs9U0B-sj!NC}#q@p8-w1pNcY81vF2ng0PA-1uWB5y5)-I?<CBq`W?`QCx*6;zE{@k
zKV5KCOtS*B#lZlBp@-Y(d(k23=tS4iZUfvbBTLgtSX|URPO!~Ez^FjsK5z%g7`2jL
zw>eLH2u1fGk==^wz`)v~FAz8)V;?Stm>4W6806H?uIVHn$c*wNTca&1q@rV}E2-We
zu6CimF;v7DkrdJ9tzxWb>qOKR(wpxi0pVx!{VF()EJagSQQ?lO4&H8|ae<_S$Un!G
zY#VN@IpXKG>k6WHu!0d*AlEH%GX!PGG@F_jsdK&13}+wEpmeW$AhJWlQirVXR2soS
zY&AnX=*AD1v??Sro9PYecCMTYn{aNQj_}D=8{ty7*OS~gh0#?2d6KDNhXQ@?c;g-&
z2A}&=>lGQ^;4&!mrn{ZTTH0FwZd+N#WGCrlOI1039W&eXjM7~YQ$s0P*v}}9w*;Mx
zwZMM!7^L{XXA-}!gcUw8pc4ZU);CZp8<s&Rh+F!|z#3KXBvMU&rT+DCPLBNfI(7}a
za`f?D1E!%*54OUc<_YFC8Uzl4;c?w2{;YKE84TnQ8Hy=eDf!KVQnjpBLMsGm9a7e+
zht0+ksAEBykGw_<aHH6;(Mq$rsJfXS5?G(o2Cy#*T**i$28Zjx$d^NZ^lcu(;?|X`
zG@gDM>Xlr^>NbH2Pyl|`Uov5U*=*P;{@E34W;3y=VyDW<>Xh7#h7=rSiZaSjID#ju
zo!`g&a%9PT)BEs2$C#{eHfP%QRhahpVJc=i$k}vw#QYP_mY!&z<vxBEA$BM(tPA_q
z@nx}}oOY%h=(c06M4RabdOhRYNI?<iSPUY@;_8fUW^b)cu`t{0_u9n$*N_61iD}fk
z6;{&m&m5U-&&M;Mtxa7N<I@fO{?MHE*(7;iw7WVohk@G><3vl4bq`?nmDOtak2E<W
z+lIlim$HG7QMuBg!3dX2s;)o{<+RrD3d2J9luD>$pPeLbp|JCQw8ku1Wm_-FLW}D+
zJ!CBw9AbgLG~TbzxaEf%krjPQ7_Tg5b+@2acjrZus|7OKJTweFI-N;3@+g&EPw<|h
zH~y*YZ0^W|dyX5#M|JlNG}t*?B~H7U12kPT#fGxf^9c3(C*-p$9&XbKOB$Qoi_I@9
zixu7Trxz6s7}DS8P6-#yHzFzc+8}D!H(ZzvA3|hw5)c|h4bP*VVs(e~SCMVSWi+`$
zorWM4-mJ0aI3v;DuH#Z4<md?M+eysRWYpJ<QY9om^CvguC9PgaZ62c=>2?{$*ty~D
z)QBcz=w2K(9ixt<Y8j5L&bAz*db@xn7WZyxsXC9ak>YVG13MLZY6-qB)y8SP{l+AO
zpK|<Y-bt5b-2b-SpuBH4n!@kA+k4?vTlt%{ft}W`&89w9_7}bX3yO%Ws`gEZs;GdX
zzi2dQO@Y8pfR;Ue%<%3x;f9I=11^qF2{(8eZ@c?O@^O#*EQHpuhEds6F+#<=yKgB#
zC7%zYG8aMxQMjnP-z$EJxcK26>@ASz38jFjn2x96)sxkVAt(>{3C58bBG3%B%|MM|
zl*zA!58f^Hb$xK|D-TSO&8+qg-`YH|Ui+j5t!O(3fJsDWR%tsH;n8U@dI{~YD4Ns}
z6OUrbJu)@6Kv~Iv2$HhsX_X;c#(G0VN*%x|%UyIfq3qPFP>KK#)-PI(_ZzF>F7Oc8
zWk1&~U6yiJ9Ni)g6kKobH3Oux8M_gGfl=pblbIUz74i$$)=hXZ2>ODDnxn?EV2zna
z{~!*Z5b0uYpKh;CUMR8ha=VD|3&k*3A~e#x@9VUnzl;q8j0*VcV~RgH_+M1#zsvvq
zwBqmjf0C%bisgUF)_ae?_5bAU|E~Tg0r&6S>%I^6xB9>Fu7B76vx@(Vru$0_-o5J|
z-T#ZU`@8a=!1&+Ct^@r~<v(HY-_`#FhriJCUm}I^Pxb$RrT_k<Kf%{8F!`7GfA}|O
z`FF)Xm(ee5_Lo>;{&NU_M{0k6#GlUbYrXv?tN8!#>XVlSdp9ir0QG(dz0>E?gul-I
E2gmPX7XSbN

literal 0
HcmV?d00001


From 4a90b8b2042d7b0a52219768e9e0ebe0f27af905 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 17:07:06 -0800
Subject: [PATCH 02/10] updates with rich boxes and yes no setup

---
 solo_server/main.py | 327 +++++++++++++++++++-------------------------
 1 file changed, 143 insertions(+), 184 deletions(-)

diff --git a/solo_server/main.py b/solo_server/main.py
index 3c7f76b..e5786c7 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -1,197 +1,156 @@
-import os
-import json
+import time
 import typer
-import subprocess
-import shutil
 import click
-import sys 
-
-from enum import Enum
-from solo_server.config import CONFIG_PATH
-from solo_server.utils.docker_utils import start_docker_engine
-from solo_server.utils.hardware import detect_hardware, display_hardware_info, recommended_server
-from solo_server.utils.nvidia import check_nvidia_toolkit, install_nvidia_toolkit_linux, install_nvidia_toolkit_windows
-from solo_server.simple_setup import run_command, detect_gpu
 from rich.console import Console
 from rich.panel import Panel
+from rich.theme import Theme
+from rich import box
+from litgpt import LLM  # Requires: pip install 'litgpt[all]'
+
+app = typer.Typer()
+
+# Define a custom neon blue theme
+solo_theme = Theme({
+    "info": "bold bright_blue",
+    "warning": "bold magenta",
+    "success": "bold bright_blue",
+    "panel.border": "bright_blue",
+    "panel.title": "bright_cyan"
+})
+console = Console(theme=solo_theme)
 
-class ServerType(str, Enum):
-    OLLAMA = "Ollama"
-    VLLM = "vLLM"
-    LLAMACPP = "Llama.cpp"
+def detect_hardware():
+    """
+    Dummy hardware detection function.
+    Replace with your actual hardware detection logic.
+    """
+    cpu_model = "Intel i7"
+    cpu_cores = 8
+    memory_gb = 16  # Example value
+    gpu_memory = 4  # Example value (in GB)
+    return cpu_model, cpu_cores, memory_gb, gpu_memory
 
+def get_hardware_category(memory_gb: float) -> str:
+    if memory_gb < 8:
+        return "Fresh Adopter"
+    elif memory_gb < 16:
+        return "Mid Range"
+    elif memory_gb < 32:
+        return "High Performance"
+    else:
+        return "Maestro"
+
+@app.command()
 def setup():
-    """Interactive setup for Solo Server environment"""
-    # Display hardware info
-    display_hardware_info(typer)
-    cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os_name = detect_hardware()
+    console.print("\n")
     
-    typer.echo("\nStarting Solo Server Setup...\n")
-    gpu = detect_gpu()
-    if gpu:
-        print("💻 Solo Sighting: GPU detected ->", gpu)
-        device_arg = "1"
-    else:
-        print("😎 Solo Mode: No GPU found; rocking CPU mode!")
-        device_arg = "0"
+    # Step 1: Hardware Detection & Categorization
+    typer.echo("Detecting hardware...")
+    cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware()
+    hardware_category = get_hardware_category(memory_gb)
+    hardware_info = (
+        f"CPU: {cpu_model} ({cpu_cores} cores)\n"
+        f"Memory: {memory_gb} GB\n"
+        f"GPU Memory: {gpu_memory} GB\n"
+        f"Category: {hardware_category}"
+    )
+    console.print(
+        Panel(hardware_info, title="Hardware Info", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+    )
     
-    # Ask for installation type
-    install_type = typer.prompt("Choose installation type:", type=click.Choice(['simple', 'advanced'], case_sensitive=False))
-    typer.echo(f"Selected installation type: {install_type}")
-
-    if install_type == "simple":
-        # Define port to use
-        port = "5070"
-        device_arg = "0"
-        accelerator_arg = "cpu"
-        
-        console = Console()
-        console.print("Solo setup: Installing optimal inference engine, hold tight...")
-        run_command(["litgpt", "download", "HuggingFaceTB/SmolLM2-135M-Instruct"],
-                    spinner_message="Solo download in progress: Grabbing lightest model...")
-        console.print("\n")
-        
-        
-        console.print(Panel.fit(
-            f"🎉 LIVE: solo server is now live!\n"
-            f"🔗 Swagger docs available at: http://localhost:{port}/docs",
-            title="Solo Server", border_style="blue"))
+    # Step 2: Core Initialization Prompt
+    init_prompt = (
+        "Continue to solo core initialization?\n"
+        "Yes: Proceed with full initialization and model setup\n"
+        "No:  Exit setup"
+    )
+    console.print(
+        Panel(init_prompt, title="Core Initialization", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+    )
+    if not typer.confirm("", default=True):
+        typer.echo("Exiting setup.")
+        raise typer.Exit()
+    
+    console.print("\n")
+    
+    # Step 3: Model Selection & Download Simulation
+    model_map = {
+        "Fresh Adopter": "SmolLM2-135M",
+        "Mid Range": "Qwen2.5-0.5B",
+        "High Performance": "microsoft/phi-2",
+        "Maestro": "Deepseek-r1"
+    }
+    selected_model = model_map.get(hardware_category, "SmolLM2-135M")
+    with console.status(f"Downloading model {selected_model}...", spinner="dots", spinner_style="bold bright_blue"):
+        time.sleep(3)  # Simulate download delay
+    typer.echo(f"Model {selected_model} download complete.")
+    
+    console.print("\n")
+    
+    # Step 4: Advanced Modules Prompt
+    adv_prompt = (
+        "Load advanced modules?\n"
+        "Yes: Load additional functionalities and module packs\n"
+        "No:  Skip advanced modules"
+    )
+    console.print(
+        Panel(adv_prompt, title="Advanced Modules", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+    )
+    if typer.confirm("", default=True):
+        module_pack_info = (
+            "Choose module pack:\n"
+            "pro             - Pro Pack: RAG, OCR, and Voice Models\n"
+            "industrial      - Industrial Pack: CV, Search, and Video Models\n"
+            "robotics        - Robotics Pack: ROS, OpenEMMA, and Advanced Models\n"
+            "custom ensemble - Custom Ensemble: Paid option for tailored modules\n"
+            "Enter your choice:"
+        )
         console.print(
-            f"curl -X POST http://127.0.0.1:{port}/predict -H 'Content-Type: application/json' -d '{{\"prompt\": \"hello Solo\"}}'")
-        
-        command = [
-            "litgpt",
-            "serve",
-            "HuggingFaceTB/SmolLM2-135M-Instruct",
-            "--port", port,
-            "--devices", device_arg,
-            "--accelerator", accelerator_arg
-        ]
-        
-        process = subprocess.Popen(command)
-        print(f"Command is running in the background with PID: {process.pid}")
-    else:
-        # Original code
-        recmd_server = recommended_server(memory_gb, gpu_vendor, gpu_memory) 
-        
-        def server_type_prompt(value: str) -> ServerType:
-            normalized_value = value.lower()
-            for server in ServerType:
-                if server.value.lower() == normalized_value:
-                    return server
-            raise typer.BadParameter(f"Invalid server type: {value}")
-
-        server_choice = typer.prompt(
-            "\nChoose server",
-            type=server_type_prompt,
-            default=recmd_server,
+            Panel(module_pack_info, title="Module Pack Options", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
         )
-        
-        # GPU Configuration
-        use_gpu = False
-        if gpu_vendor in ["NVIDIA", "AMD", "Intel", "Apple Silicon"]:
-            use_gpu = True
-            if use_gpu and gpu_vendor == "NVIDIA":
-                if not check_nvidia_toolkit(os_name):
-                    if typer.confirm("NVIDIA GPU Detected, but GPU drivers not found. Install now?", default=True):
-                        if os_name == "Linux":
-                            try:
-                                install_nvidia_toolkit_linux()
-                            except subprocess.CalledProcessError as e:
-                                typer.echo(f"Failed to install NVIDIA toolkit: {e}", err=True)
-                                use_gpu = False
-                        elif os_name == "Windows":
-                            try:
-                                install_nvidia_toolkit_windows()
-                            except subprocess.CalledProcessError as e:
-                                typer.echo(f"Failed to install NVIDIA toolkit: {e}", err=True)
-                                use_gpu = False
-                    else:
-                        typer.echo("Falling back to CPU inference.")
-                        use_gpu = False
-        
-        # Save GPU configuration to config file
-        config = {}
-        if os.path.exists(CONFIG_PATH):
-            with open(CONFIG_PATH, 'r') as f:
-                config = json.load(f)
-        config['hardware'] = {'use_gpu': use_gpu}
-        with open(CONFIG_PATH, 'w') as f:
-            json.dump(config, f, indent=4)
-        
-        # Docker Engine Check for Docker-based servers
-        if server_choice in [ServerType.OLLAMA, ServerType.VLLM]:
-            # Check Docker installation
-            docker_path = shutil.which("docker")
-            if not docker_path:
-                typer.echo("Docker is not installed or not in the system PATH. Please install Docker first.\n", err=True)
-                typer.secho("Install Here: https://docs.docker.com/get-docker/", fg=typer.colors.GREEN)
-                raise typer.Exit(code=1)
-
-            
-            try:
-                subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20)
-            except subprocess.CalledProcessError:
-                typer.echo("Docker daemon is not running. Attempting to start Docker...", err=True)
-                if not start_docker_engine(os_name):
-                    raise typer.Exit(code=1)
-                # Re-check if Docker is running
-                try:
-                    subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20)
-                except subprocess.CalledProcessError:
-                    typer.echo("Try restarting the terminal with admin privileges and close any instances of podman.", err=True)
-                    raise typer.Exit(code=1)
-
-            
-            
-        # Server setup
-        try:
-            if server_choice == ServerType.VLLM:
-                # pull the appropriate vLLM image
-                typer.echo("Pulling vLLM image...")
-                if gpu_vendor == "NVIDIA" and use_gpu:
-                    subprocess.run(["docker", "pull", "vllm/vllm-openai:latest"], check=True)
-                elif gpu_vendor == "AMD" and use_gpu:
-                    subprocess.run(["docker", "pull", "rocm/vllm"], check=True)
-                elif cpu_model and "Apple" in cpu_model:
-                    subprocess.run(["docker", "pull", "getsolo/vllm-arm"], check=True)
-                elif cpu_model and any(vendor in cpu_model for vendor in ["Intel", "AMD"]):
-                    subprocess.run(["docker", "pull", "getsolo/vllm-cpu"], check=True)
-                else:
-                    typer.echo("vLLM currently does not support your machine", err=True)
-                    return False
-                    
-                typer.secho(
-                    "Solo server vLLM setup complete! Use 'solo serve -s vllm -m MODEL_NAME' to start the server.",
-                    fg=typer.colors.BRIGHT_GREEN
-                )
-                
-            elif server_choice == ServerType.OLLAMA:
-                # Just pull the Ollama image
-                typer.echo("Pulling Ollama image...")
-                if gpu_vendor == "AMD" and use_gpu:
-                    subprocess.run(["docker", "pull", "ollama/ollama-rocm"], check=True)
-                else:
-                    subprocess.run(["docker", "pull", "ollama/ollama"], check=True)
-                
-                typer.secho(
-                    "Solo server ollama setup complete! \nUse 'solo serve -s ollama -m MODEL_NAME' to start the server.",
-                    fg=typer.colors.BRIGHT_GREEN
-                )
-                
-            elif server_choice == ServerType.LLAMACPP:
-                from solo_server.utils.server_utils import setup_llama_cpp_server
-                setup_success = setup_llama_cpp_server(use_gpu, gpu_vendor, os_name, install_only=True)
-                if setup_success:
-                    typer.secho(
-                        "Solo server llama.cpp setup complete! Use 'solo serve -s llama.cpp -m MODEL_PATH' to start the server.",
-                        fg=typer.colors.BRIGHT_GREEN
-                    )
-                else:
-                    typer.echo("Failed to setup llama.cpp", err=True)
-        except Exception as e:
-            typer.echo(f"\nSetup failed: {e}", err=True)
-            raise typer.Exit(code=1)
+        module_pack = typer.prompt("", type=click.Choice(["pro", "industrial", "robotics", "custom ensemble"], case_sensitive=False), default="pro")
+        typer.echo(f"Module pack selected: {module_pack}")
+    else:
+        typer.echo("Skipping advanced modules.")
+    
+    console.print("\n")
+    console.print(
+        Panel("Solo core initialization complete!", title="Setup Complete", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+    )
+    console.print("\n")
+    
+    # Step 5: Load the LLM using litgpt
+    console.print(
+        Panel(f"Loading LLM model: {selected_model}", title="LLM Load", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+    )
+    try:
+        llm = LLM.load(selected_model)
+        typer.echo("LLM loaded successfully.")
+    except Exception as e:
+        typer.echo(f"Failed to load LLM: {e}")
+        raise typer.Exit()
+    
+    # Step 6: Start the server on port 5070
+    console.print(
+        Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+    )
+    try:
+        llm.serve(port=5070)
+    except Exception as e:
+        typer.echo(f"Failed to start server: {e}")
+    
+    # Step 7: Optionally Generate Text
+    prompt_text = typer.prompt(
+        "Enter a prompt to generate text (default: 'Fix the spelling: Every fall, the familly goes to the mountains.')",
+        default="Fix the spelling: Every fall, the familly goes to the mountains."
+    )
+    typer.echo("Generating text...")
+    try:
+        generated_text = llm.generate(prompt_text)
+        typer.echo("\nGenerated text:")
+        typer.echo(generated_text)
+    except Exception as e:
+        typer.echo(f"Failed to generate text: {e}")
 
 if __name__ == "__main__":
-    typer.run(setup)
\ No newline at end of file
+    app()

From d5ed1687296a85cfc25be8234f51eb5a57ac66b9 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 17:19:17 -0800
Subject: [PATCH 03/10] add docker management steps

---
 solo_server/ensemble.yaml |   9 +++
 solo_server/main.py       | 113 +++++++++++++++++++++++++++++++++++---
 2 files changed, 114 insertions(+), 8 deletions(-)
 create mode 100644 solo_server/ensemble.yaml

diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml
new file mode 100644
index 0000000..61f0516
--- /dev/null
+++ b/solo_server/ensemble.yaml
@@ -0,0 +1,9 @@
+advanced_modules: true
+hardware:
+  category: High Performance
+  cpu_cores: 8
+  cpu_model: Intel i7
+  gpu_memory: 4
+  memory_gb: 16
+module_pack: pro
+selected_model: microsoft/phi-2
diff --git a/solo_server/main.py b/solo_server/main.py
index e5786c7..72c73b4 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -1,6 +1,8 @@
 import time
+import subprocess
 import typer
 import click
+import yaml
 from rich.console import Console
 from rich.panel import Panel
 from rich.theme import Theme
@@ -40,6 +42,73 @@ def get_hardware_category(memory_gb: float) -> str:
     else:
         return "Maestro"
 
+def build_docker_ensemble(module_pack: str):
+    """
+    Build an ensemble of Docker images for the selected module pack.
+    The Dockerfiles are organized in subfolders within the "containers" folder.
+    
+    Adjust this dictionary to match the folders in your "containers/" directory
+    and how you want them grouped by module pack.
+    """
+    docker_modules = {
+        # Example grouping (adjust as needed):
+        "pro": [
+            "rag",
+            "langchain",
+            "Transformers"
+        ],
+        "industrial": [
+            "PyTorch",
+            "Tensorflow",
+            "vLLM"
+        ],
+        "robotics": [
+            "ROS",
+            "LeRobot",
+            "OpenEMMA"
+        ],
+        # You can place additional folders here for a "custom ensemble"
+        "custom ensemble": [
+            "Browser Use",
+            "Computer Use",
+            "Cosmos",
+            "homeassistant-core",
+            "JAX",
+            "LITA",
+            "llama-index"
+        ]
+    }
+
+    modules = docker_modules.get(module_pack.lower(), [])
+    if not modules:
+        console.print(f"[magenta]No modules found for the '{module_pack}' pack. Adjust your dictionary as needed.[/magenta]")
+        return
+    
+    for module in modules:
+        console.print(f"[bright_blue]Building Docker image for module:[/bright_blue] {module}")
+        
+        # Replace spaces in the module name when creating the image tag
+        image_tag = module.lower().replace(' ', '-')
+        
+        # If your folder name has spaces, you may need to quote or escape them.
+        # Here we assume your OS can handle the direct string (Linux usually can with a directory rename).
+        build_path = f"./containers/{module}"
+        
+        try:
+            subprocess.run(
+                [
+                    "docker", 
+                    "build", 
+                    "-t", f"ensemble/{image_tag}", 
+                    build_path
+                ],
+                check=True,
+                capture_output=True
+            )
+            console.print(f"[bright_cyan]Successfully built image for:[/bright_cyan] {module}")
+        except subprocess.CalledProcessError as e:
+            console.print(f"[warning]Docker build failed for module {module}: {e}[/warning]")
+
 @app.command()
 def setup():
     console.print("\n")
@@ -96,13 +165,15 @@ def setup():
     console.print(
         Panel(adv_prompt, title="Advanced Modules", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
     )
-    if typer.confirm("", default=True):
+    advanced_modules = typer.confirm("", default=True)
+    module_pack = None
+    if advanced_modules:
         module_pack_info = (
             "Choose module pack:\n"
-            "pro             - Pro Pack: RAG, OCR, and Voice Models\n"
-            "industrial      - Industrial Pack: CV, Search, and Video Models\n"
-            "robotics        - Robotics Pack: ROS, OpenEMMA, and Advanced Models\n"
-            "custom ensemble - Custom Ensemble: Paid option for tailored modules\n"
+            "pro             - Pro Pack: RAG, LangChain, Transformers\n"
+            "industrial      - Industrial Pack: PyTorch, Tensorflow, vLLM\n"
+            "robotics        - Robotics Pack: ROS, LeRobot, OpenEMMA\n"
+            "custom ensemble - Custom Ensemble: A variety of additional containers\n"
             "Enter your choice:"
         )
         console.print(
@@ -113,13 +184,39 @@ def setup():
     else:
         typer.echo("Skipping advanced modules.")
     
+    console.print("\n")
+    
+    # Step 5: Save Setup Information to ensemble.yaml
+    setup_info = {
+        "hardware": {
+            "cpu_model": cpu_model,
+            "cpu_cores": cpu_cores,
+            "memory_gb": memory_gb,
+            "gpu_memory": gpu_memory,
+            "category": hardware_category
+        },
+        "selected_model": selected_model,
+        "advanced_modules": advanced_modules,
+        "module_pack": module_pack
+    }
+    with open("ensemble.yaml", "w") as f:
+        yaml.dump(setup_info, f)
+    typer.echo("Setup information saved to ensemble.yaml.")
+    
+    # Step 6: If advanced modules enabled, start Docker ensemble builds
+    if advanced_modules and module_pack:
+        console.print(
+            Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+        )
+        build_docker_ensemble(module_pack)
+    
     console.print("\n")
     console.print(
         Panel("Solo core initialization complete!", title="Setup Complete", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
     )
     console.print("\n")
     
-    # Step 5: Load the LLM using litgpt
+    # Step 7: Load the LLM using litgpt
     console.print(
         Panel(f"Loading LLM model: {selected_model}", title="LLM Load", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
     )
@@ -130,7 +227,7 @@ def setup():
         typer.echo(f"Failed to load LLM: {e}")
         raise typer.Exit()
     
-    # Step 6: Start the server on port 5070
+    # Step 8: Start the server on port 5070
     console.print(
         Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
     )
@@ -139,7 +236,7 @@ def setup():
     except Exception as e:
         typer.echo(f"Failed to start server: {e}")
     
-    # Step 7: Optionally Generate Text
+    # Step 9: Optionally Generate Text
     prompt_text = typer.prompt(
         "Enter a prompt to generate text (default: 'Fix the spelling: Every fall, the familly goes to the mountains.')",
         default="Fix the spelling: Every fall, the familly goes to the mountains."

From 95fe815b8ce4462095370835a1799b21d21ef8fa Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 17:29:52 -0800
Subject: [PATCH 04/10] pr clean up

---
 solo_server/commands/recommend.py             | 140 ------------------
 .../solodocs/Bug Report Template.docx         | Bin 9123 -> 0 bytes
 2 files changed, 140 deletions(-)
 delete mode 100644 solo_server/commands/recommend.py
 delete mode 100644 solo_server/commands/solodocs/Bug Report Template.docx

diff --git a/solo_server/commands/recommend.py b/solo_server/commands/recommend.py
deleted file mode 100644
index 19d4eec..0000000
--- a/solo_server/commands/recommend.py
+++ /dev/null
@@ -1,140 +0,0 @@
-from txtai import RAG
-from litgpt import LLM
-from rich.console import Console
-import os
-
-import nltk
-nltk.download(['punkt', 'punkt_tab'])
-
-from txtai.pipeline import Textractor
-
-from txtai import Embeddings
-
-
-# Create Textractor
-textractor = Textractor()
-text = textractor("solodocs/solo-server/solo_server/commands/solodocs/Bug Report Template.docx")
-print(text)
-     
-
-def stream(path):
-  for f in sorted(os.listdir(path)):
-    fpath = os.path.join(path, f)
-
-    # Only accept documents
-    if f.endswith(("docx", "xlsx", "pdf")):
-      print(f"Indexing {fpath}")
-      for paragraph in textractor(fpath):
-        yield paragraph
-
-# Document text extraction, split into paragraphs
-
-# Vector Database
-embeddings = Embeddings(content=True)
-embeddings.index(stream("solodocs"))
-
-console = Console()
-
-def recommend_based_on_docs(query: str):
-    """
-    Generate a recommendation answer (with citations) based on the documentation context using a RAG pipeline.
-
-    The function uses the following documentation context:
-
-    "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. Additionally, having only the most relevant context helps the LLM generate higher quality answers.
-
-    Citations for LLMs
-    A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model.
-
-    txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match.
-
-    for x in embeddings.search(result):
-      print(x['text'])
-         
-    E5-base-v2
-    Image Captions BLIP
-    Labels - Zero Shot BART-Large-MNLI
-    Model Guide
-    |Component |Model(s)|Date Added|
-    |---|---|---|
-    |Embeddings |all-MiniLM-L6-v2|2022-04-15|
-    |Image Captions |BLIP|2022-03-17|
-    |Labels - Zero Shot |BART-Large-MNLI|2022-01-01|
-    |Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01|
-    |Summarization |DistilBART|2021-02-22|
-    |Text-to-Speech |ESPnet JETS|2022-08-01|
-    |Transcription |Whisper|2022-08-01|
-    |Translation |OPUS Model Series|2021-04-06|
-
-    While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references.
-    The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. 
-    This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference."
-
-    The function prints the generated answer and the corresponding citation.
-    """
-    # Documentation context to guide the answer
-    docs_context = (
-        "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. "
-        "Additionally, having only the most relevant context helps the LLM generate higher quality answers.\n\n"
-        "Citations for LLMs:\n"
-        "A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model.\n\n"
-        "txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match.\n\n"
-        "for x in embeddings.search(result):\n  print(x['text'])\n\n"
-        "E5-base-v2\n"
-        "Image Captions BLIP\n"
-        "Labels - Zero Shot BART-Large-MNLI\n\n"
-        "Model Guide\n"
-        "|Component |Model(s)|Date Added|\n"
-        "|---|---|---|\n"
-        "|Embeddings |all-MiniLM-L6-v2|2022-04-15|\n"
-        "|Image Captions |BLIP|2022-03-17|\n"
-        "|Labels - Zero Shot |BART-Large-MNLI|2022-01-01|\n"
-        "|Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01|\n"
-        "|Summarization |DistilBART|2021-02-22|\n"
-        "|Text-to-Speech |ESPnet JETS|2022-08-01|\n"
-        "|Transcription |Whisper|2022-08-01|\n"
-        "|Translation |OPUS Model Series|2021-04-06|\n\n"
-        "While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references.\n\n"
-        "The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. "
-        "This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference."
-    )
-    
-    # Create a prompt that injects the documentation context
-    def prompt_with_context(question: str):
-        return [{
-            "query": question,
-            "question": f"""
-Answer the following question using only the context below. Only include information specifically discussed.
-
-question: {question}
-context:
-{docs_context}
-"""
-        }]
-    
-    # Create the LLM instance with a system prompt template.
-    llm = LLM("TheBloke/Mistral-7B-OpenOrca-AWQ")
-    
-    # Create the RAG instance using txtai; the output mode "reference" will provide a reference id.
-    rag = RAG(embeddings, llm, output="reference")
-    
-    # Query the RAG pipeline with the prompt that includes the docs context.
-    result = rag(prompt_with_context(query), maxlength=4096, pad_token_id=32000)[0]
-    
-    console.print("ANSWER:", style="bold cyan")
-    console.print(result["answer"], style="white")
-    
-    # Retrieve and print citation text using the reference from the result.
-    citation = embeddings.search(
-        "select id, text from txtai where id = :id", 
-        limit=1, 
-        parameters={"id": result["reference"]}
-    )
-    console.print("CITATION:", style="bold cyan")
-    console.print(citation, style="white")
-
-# Example usage:
-if __name__ == "__main__":
-    # Test the function with a sample recommendation query.
-    test_query = "recommend how can I optimize model performance based on the docs provided?"
-    recommend_based_on_docs(test_query)
diff --git a/solo_server/commands/solodocs/Bug Report Template.docx b/solo_server/commands/solodocs/Bug Report Template.docx
deleted file mode 100644
index 54ba636111ccc27b942c2c28fd63b091892700aa..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 9123
zcmaJ{1C$-zvOZ&58QZpPW{gZGGa1{;*tTukw#{Uiv2E*3a__r;?*HDs)obl@x>r~4
z-L-4i{;Ix~mj(ev1wcYV0;aqTKLP#_<oEwhR<`=|mJSxW2DT=aMs&^==J|2bmOTuJ
z0UxKh3C;+C>Vw2U(M3-RS{oofn8&K$=U4Y`-8v8;K&zn5s31qaSq;RlF>B++;T&^t
zeYKYw$W4N6DhPl|&)+_}a^N5a*%M@r61$4SX(k@+?{&-E7b@oraQF3^qElcYZN6g*
zCK9)+=n4<p;R0m`N4q}{*2n`i`r|6*(l}S6E8gp|1aDCmGmw#j9p(wa#%CF)E#51}
zB#n#7f8jL+a~yU}D1)U%oqvsE6rvM`6Knmft~RTz{RwiHRsU|D&{1mc>9#?dEuad)
zUG*}}*eXhcC5{_IRc%j47-tX{OPvTyG!fZjw~ShWz}}Z~#RkmOHd5%c#DrYgHL3wa
z&XIB(#|6U-lxI~Hi1+bZabr$wXeFfJ4vabMB7~rwfHsIez7LE2=pxKME{ma|206EO
zbdndWU_F_G)61fbn?YhE4vm$`;+wIRG4%4%OEtpDCU`{VDTs_EVD<@6s|nZ%^a!J%
zpy%{i8BAuwQJD0#1Rz)*N%h$El~6i=GOg%3k$;Ux`66u*oYMD|0E8D#ADOlTNM0HO
z($9JlGZzQ|NC5``<p1LoBmM6b+Zou~zfZ8;?-@>5woQGX;moYF_38Fr!iH3a?q=hH
z6H&XJYao3|uK})b5}D`Qcz`!TI(&gz!s`zY>bX%m`pV2S+6WPBDpB~Dzz;M*0?0}V
zyX$v@cRMKRA7H=&WT0bwQ5e@9o_6wwVXNoTnP9{b$+jpU;_InGiF~PgvcE-zKpN5H
z?M}6n(-FhpWUES(H2Wl+Qs&;G=G(LZ@rvzc%@G|TNy|kih@sE5>eo<E5WRdnpHkX%
zq_aZ)kp{Ue7RK`R6kahYgbKyjjcy#X6u|j<L>Q6gZx^0|3}?2`Bw{xQW|}u3B`p$W
zbEs*A6C}w)=#9tdEYsX$FlLPcU;KfggQG$~_!t7C_2Z{dEPVYj|F24PGszy)?E#PW
zt}q<LnR~AxawVJ8K36sMc+j@+z$)^~03AC>nH-SrwJeUF>?9k`;<QSMq#i|KKJ?<|
zqfFV&Qrkrp`RHiXSNxN}gnFz|ch*41TX3?YC%^%mjvfB`0lL@F9*@{I&m<@6+TbEA
z4%TBgi<ked4$lT>k2{8W{svl;#faxlQ~upe5sM=nW=;d4Z+R+x5^9y<Ey$^pYKdqp
zZnhbc_Us3d)t})}HuXmX3LJ@L-kQ)uTAD3MY-h+QRwb<I88rAzEkVQA6Wq~GMameC
z+GLzMb!Fz84|69~sg5Ux+6C>j78WK(X6%TBiNH=R+F_rt>T2yG1zmCKRi;#P8{n5*
zzOxA~JC}1&=3t^hBnDJnyAWc=!gceDKZSqaBX$Eee|vW~2uKm$dL;p106-k_zq#MP
zB8;JxrM>c3U2}uq{ueK8x%`g^1Nn0~F8sTM|BzsG)=FI|3q(>>oJbNqEG2b^JEbPV
zJUp<5mgMI7o1w%j79UTCS^P1Wi;1k9>S!EE55s6arQPM@-r!~i37^+SRv8i}C*()G
zMs2*5vMREBl9BL9mQhJGpHYgNGGwgh8LqFoRaHfVeIIFx!GHd+>@IUe+)h<y!rAAN
zWzXwfNc(PF17AHAlYP?wSx!<bN|?Nl*A85`%`2HfTUyVskrbUHUkz^NMw)(R;zCCY
zUW!-qW{%`q{`f6CH71;x<$|V;i`10HHZTF|Y-|6Dk^@ogzz@zG$yZkj>TUO!dYWL|
z%Ou?f2+NjU@6>WOqR57D>x(mMew5OFqMLA6uLBx`9{=QxoSf|i7-5M}xG&)zfjZa+
z7q1jy`GgZtPvF|X?y<Hs{XH1uM0uk9>Acs1Sgm{v$8W`5!M{C^d}%4%u`^Ye`w2u%
z+X|$t5|FwKxSI!%t?n@zD`(h|bcWYra@3qb%mRR(CmQ{%;+(Y3Iar>7=Vc#+<=ial
zX-(4y0$`69qJ|q<KgW<UXXzO|L?EMP<42NLiVZwTk#uS=QP#C4HF*AxKm(ahE4}X#
zXz9O3AcTKKAp39T2EXIaO5%xSE(dBrrOp}QrY8&`RBAXG?u{^W>x{V(>&cfcpfsKS
zg#?Qj!wL_JD<ohNqPm%e*;h!u+iw#WM|ygE?jDlE#-V<FVRY#i^?PB6;p$-rE(gy~
z&pW24VR^t<AYV~w(|Kbi_EOjQPS6!kpkjzmKJE~OC&=ZXYByDcA;<Z&8R96cH0u(9
z<)`YN6cDgsc7~{r2yP&vl-*XGee+h*S(q6H)j|y_MHTmvY)~99=vJl1$=)oyMs(lY
zr9+?@Q51;}--i~lhYuS8wbPiH$!1wej*B{JmNZvVj99+lo7M&5ZvQ|z404hb)U1OP
zqo|30IT6Bf>jG(v7%!fQ8ljhvoabfY&zL_AP^qqTI2K|F<&xRX@+Wae9&YW}koQ_>
ztn~tNnS0?pRq2QGxm8Rqg0_Vuk`gFF76?Fu3}vI}fN1eAN!oI#w6%=Vg*5{@$d=d<
zrRKiH+PRDah(jzrI@DGA7(C0L&Jxxw!Q)KQf!xyySYzs}f{W}MeCYj=;Q${&zXkff
z%J&5My?~pe1I`1nF$%L-RW!5QDV%Jtb1zsa)@C7?L~aEfEw7hFeetKDnR4<>LOeyL
zX+GWV!OX7at^n)%UCcD2eW<Hx6(uo)165p+f@M{*l{R2qWu<sfN^Q*gNVrB%dskyZ
zEHIxBr=VvSs{hk4q;0WA1|F#a(q}yQ;0=#MbW2>U4v48Anufm0A!>Q1zr&AJ=%m?k
zk6>(fCpdelqSIcMCuQ7n4|gx)k=Kf~AS%a}iv>S!u%W`D%_l00_`0_GdUH8CcMwmZ
zh5UjX*pA%9H?~cWWg?LRG<fr@?wA+!`<H!DhsDtTMS?zjGhyf~M$cIo5^P6!Gy6g_
zN4vmllTZ0uXg`$7S;|mnp1vNt8uDf1w%HeCIC<=p67ehVem7Gyv@BKGKvZ~%&3VL^
z`Yt)8_Pt87LL|f9{O#whR5UrcXhxe=^xh!WlQ3(<hp#y$%fSy;>Frk$wN9_#n;$=h
z@V`iGK3#Qe;s@g{bS5m!!dDks5!@c<Ez}~A-d@x8^L;CI&R^)3toGGbGXDwr)EV>^
z7T*C?f66iUSfPRtq1^cl&vX=JdrL)`StIXIqT$eHCP@BDAi8!w3&yT^Tun%Z%RVeQ
znYtYu>GiC;s-FM2=M7gR6f>#K&x84fZ|(%a;?cvepocqxbQhL}XA=qefNe{()#6#`
z%6qm%C!AsR4BS76{i*nhSf~J1dNLBpIX$hvX)w6?M;<~<!pF;9{$J6b*CtY_0|Wr@
zMEh^i|6kxk-%8KH!obr0SMWd48n?w}NA~b>hoEURrLKs1ARo4=i`J^y##fOH4vC3v
z=UkVG^@RkH0l-r3ZjY`y-o>=*Q>zp&_3|ZU5=2(P=>+7FC>CIn4vft}gkJgLWp!Gz
zcv-UWdAh{gG6Cl&qdtYp)vgc_5MeSi{<LcAaK1RV@ii6}m1#z;T1)b><Z-_YoE&tm
z4!cD4Ba{%_Z)aJuA{o$kEl-F*mMFFyfd%Emk%(9HO*CDQDsCC84UCpyHdW8GF(fOu
zY<($Gh>Cs+{~X-z)7Kj(DrL&a&@hyN1FH_&pIPJgX{WVgNI}8iu0H`d!XZkCoMx7^
z-d@Y}o`Etr?9{Z6VrR#7%RIiaKO_PvBJK$xJU@h)lz0m34DTUODJ1<C8n%w4fK;`(
z=f*05Rgw}#iAj~i5;4W2HF%9KX;)$#i)qV$w^X^0pX;l;=#Onj+I_6&H2Tcy_`G{?
zFd73}R<t1p2@xoj0+O!z>@EX)s0%_gVlK2@=cX8rwJcO-E@hT<sNz=weeM(NP1}qq
zSi|oDoe@jYq*vx-r`$*Ksk2=Zr-4ZOt3>Dfy^uT|vIIV*H^y~U+dR1Fo*&Z1mAJ}|
z5mbqFiaNe0^ca=Tz&2?rp*5{g(&zc|TWCm^EDyjZi&Y3)@(4c7BwqBrjt@B_j4S<I
zwyF#D4BwnXI$vg=v|KyH?e^OJN#kr=P_c4+d~BdRz2)rExJ4D4PWdEm#3F>**JG~(
z2z7MADdx-K>%NX*25L{+f=j_(G+xX(l-XFDi;HV1g@ajgX)gd1y&JT4GZ3Lo4tNu}
z=u0+KGb)oT5b(tk+#05qKR-Q*Op&iUrgCyulbPy6`2Y(*`T+E8c&xPTW6~TaB8W*q
z0Wu_8Oqw*^aC3fMgLP4sodG7YO_uY?MXo!%h#*<~v}*(3!2Ae1KR~9nzV}*#kA!#7
z##SK<jcBXUZ15h&kOP!*o4bDN#{10hvl^Nt5@Wb~>`v!V5*d?PN3UrB<e=rrItMT=
zWx^sJzeoUo32raOuqO|Q_Ye>xR21J5FlG&v+b3@Q2{`?f4<;5>F*bN$ChquZNoRFy
zExvE*iW6&3yVAi7?XFL2h!+oK4fpnH1A9@m`!Nzo6e1Ay{>H8bK^@1f_!Dy+#3&T1
zlT=|pbSL;bN31;}VsDJ1rjTN(L>lntB2GNAmLZtrimI<*fk_dKFHuqq2z+piaeu&v
z_zrEyLD0TUnhjnbU4Fy`byI}o>k|xl4PDB%#-jveqnOxx!JgHb4W4fS2|^8C6lqPa
z60t-CXMA06@mNo;2TNnxn9sDn4Wut5ED<U6$OzN+c-_d{?)rd5>evsG%&*IcC{?wT
zh)%#2s2&vceG=*7b68SL4_E+>cXk;4X}L@$h9|VGyN4wi&Pb(}dj8S<oI7^B6DMp~
zXW$usEERbEN_?GwSj?yixA|<y(!<&s20spAcT=GrWrao^eR%Xiia#1Q*^x&QlppMu
zmy#Lna+MZ`&<l4Dq3XR_S;kmc7=#71KBX|ZytXvxsa!`_F@5MK>3-cOyC6@5e6+?3
z552wVtp@U%ZAXQ3hHPD9|ACf{_U#JE8dkL$2jAD5!u#3z_9?$0XuAl<p3k$WYU^n7
z+uvp3s9;it-n75GWJ22?87}84l^DthgmIlX&Z1orp=(c@S+sA3o5Y?v7nVKDsC}|?
zz{0g=Lp6Y@3Yf*<3Bg~?`kCnKFt;PN5bK+gf;ss8Qp0GK<QaT!?=f($O3X;gFl$WT
z8_IP2+A7x{o3>4jwvB$?RkrS&dgMwJzFG)Oli9b0u@u`MULpGkqwQjc^MP5mwlk{I
z?f7j#$P)F?NxA3|Y)`;$;U^JgxO^=a_xJ?*LrSoE1nYr4TZ2_@NxaGrEHSfjp#}C`
zt6U{~!NL-pV5h_fY=@K;Lsz|1wh|4EA7!?PyD{f*t<1_A5C_3w6y$?K*ds5idxhA&
zjnfrt3auqL{H}=`S6mH@ZrDG{%1r0zKR=r5B2$h~;yw6{L)h~!sCR_ByhI$TbXGZA
zJv5|dw$C3w>?Z>RioOcC;V>uKdB+M<!hQzB+*9vNx(8t-oFU;53guaDgiI7}Hz?Go
zj&vm%2pBD_K(LGM(KAGLTbK+KHusET?74wSecRc?_aHJ-#<m6^;eb-CGxS8WKak7|
zbUf^0C-M`-Fh+IA=H}u1B~4{(c0#)9y9b3;XlFeK$3JJURAmYMOkOS5#&OHa(dq={
z34{_!OfwO7s}G)dqGSfgMkfNOOpMswHJm$Hy(uv~w^!oD`b8j8W>HBIIPPLpAPSGd
zHs*guNlc$Tt73(kWh<2oA}+9u7w$d9mOpIJB{X)-`;oFn!`_f!WO+j>K~ahmXTK(o
z?s}jr#7Pcpekof*R$Cli2leV!lBz)zM4(Wv{XF7-f}nLMS|y2N!DIjiwOW;oZDCu~
zTo=LTn_C;aRPNK#F=x1@?-;sj&vrOM$UAJ8C|$x&$i_&QLo0AV#tD=*0^E#hYhEai
zlHBR~L`=oiT9?_EXfPI4^s(q!{Bm^i%>z4!L7DBGh=NXaP8<&vgZc;tE+jPGK@+UM
zf5e5{XH(Z;1xC=Ox@N*Bt4Vz^{yD0tNo$ey?$-&}&97GU@>h#?{@(5w93ya|ZPDzd
zU9Y#|)DdQ;-Qc-(ef^-f{`~bD4|G%9idc1rrZ&{&=dcs1N&|W*ptM2y^;pJ+(ze^r
z)$hMYjYMwNTRgDog_J7CuFDLqT=>l_s)TftR%(u-jt?HGS(E(>6~~(=xHD^Nu;A>I
zJ=Tk+>1X$4;<`{=5)YNfROgBuF8+J33!In2+usc4!xAB!7ySz75Yz%>$MDw+BrQM6
zk<c%oy}rh_-id9StVMi6X#U=Kwz<$?l<u9I8tL;rd!*tEegBm*7Mtya@?H`gd!1`*
zjoGU7w>JkS!S;)UUWBwDKoFsa)WisE@0VFOR2w7!UY?<{Bkc>k{ZvV3P6`hQ;<f~|
zdR^c{E;HZr&K(9t!;-3`c-RR$JZf%Q8~Rv(cV>nraP${9{314pl<EGDu^`zPNapPj
zXRa_`UG8SrhJwQ(We*zJs@4vw2B^y*P<EmiMBG=P*4E^$syV?n!YaHNF#+|VUM{2Q
zMMhVjo=$i%DV$syg5=W_u_+)<5w{4ysNCkI+rbXi``D=9&8O0ES^J{Ag9JrjUGu78
zLc%SQNJrukvSF@x;?qaCG81x`oiawCHGBEF0P#1Pw2>^ggYg+RQxxJnkqU%g@EgCp
z)w3*Uit;>|=kekzL_l9%-is!sTfVgEsqmW^6na#Y_hV;X$CwA;<9bnXH=SCDSaV%0
zhe5>HrDDz9dO|Z5Cr6uZ{+wJ&_hBAWXg9y=Jzg(47HqvsXY%0EX&ga>b32#s4%)aD
zgqJwnfJg43dRjSkUrWLE5odC^;xL;S5nef7#e!D@O=e%|XIp_oBdJhvJX>L}u<K@L
zrf+I66KLOq5HFleHF2}s$0sRJxkY+qYcUl_CF+Lui0^j{;DxY>qEKBta<-ztv*ueq
zw9wLY%pcE9R=_2*zOke4h|x3Kp)*%bQ^EEe(+cW%On-d9C>*5JoG=4P#<s~*zSoNn
zhamloFB<P2;^ZWkzFV@QPLDxicR<=W?4{)XFu*HsWgbz<4wY^!oIA>{S4?M7QfaOj
zY$J9rH|h|5QW17cS<k&>qk5_rpunujqE~KMdoYHkI|d%ByIH|ZX&*J)x^j*id?eJ|
z&J;JP6#`0R%~cquY&ovpjhMMC%G4H?!!zVnuyWq+SU;xJtjSM5PxFx!Wx~utFlkh!
zSO)+t0@`m*6~h?`1utg{=bP55Mdu+nS^xxvRQt#|U>aLv+vpKd#c=g_GB??d<~%a8
zLiJQjMY^PN%SyXUuyI29AhfB^KNR7*g=_10ul+{QPtt=B0SzHUW25+0Li-(il^ep_
zB0Uh&F$+EhII?#T^0z#4bG}Dl?)J>cwYn`D{CCScZ*Y4b;tqd<HiZ;fD$-8jEQV5&
zD>*i7Azs^YQw&(8fe}syx1-a@*DOpW)2uq;TeVT&HRuw@YBf)NMgCgX@!YIod5|E5
zWRW<UZB5a^4n!uGcX3of48``cV`;#vA!u`vB7I0)D<rJ{{Iq!f?7FOW%Ffjq$!mZ6
zGtUwzSD9^tx+Gp%Ou>*RYVka73F~P#>HG%wts#2xxCd{SrPVf&X`BwlNp8hsYqp{l
zN5W)ebQ4B<`=Fl$#H|QUrNMYQt>%>W>BV`|BaRUJ<156kOeX{^NV637z06wm-!dJX
z|1EN8+ZveL(f^t6(EYkn9wTeo%Yf+rUQ-WRoUfV1mkLgkh1A#l2|{ssANYxwP*%$Q
zDI|^r9(2|}@hte;)p*!kDn56R6>POrUe=H;qQt-t-g;=h(v>ajprI5~UJfBJVzGrd
zgd+qTmco>U8@>=!AqBy~<F)*-0^w4<T^>ajlh0Ktv9RUWOKu0~J5-a}21$kj-(o3m
z>wT&!VGw`pU+^zhn7H_A8bzQ7pVdXBnKFVMP7zUa4D;6xDSjAwf-NP*Bw{Mu8K53m
zOcr+jgq&^D7taS9tMa*wGiZ;LaNii=!^}^$piowbi<*5#XB=ynkXXLPME1<KXRu$U
zivpA$UA!BH<~>pT&lPHz_dEV!wcnNspX7$`VSoi+N3jkZ;+7rtLNuKLBXkhm0Vr%1
zAdL%4I$z)F>j3(?jGyj|nosZ?$=Jh&od>guO$qk%)CNqz_coeVfh09JNDa@Tx_zLy
zI!r%ugno9M(H9~DvkC5wvs+L~+4fV-8-cJlV0ttbr8D@>0{IOI>!6^bk);E>i(h7j
z8oXK!F6t}PedNjDv;>dyxBC6dNEGd0qR=8n=BB5jBA5Eo9^f6?7w2gH-<2?0<4L0G
z_xGTBuZI2SH+|2Q|3SCd8yi>{(En~3f2YJ6(pJlCIGqR!P6YN=28*hPpbjAm)yw6w
z*d&&LcqYLK3-UPvGm>uDOIuz*y)9n@(H1NMziN7;FXTV5EOMFSPE}TQ;w4a~PUOlq
z!j~hkNxr<EK}bGa#`gwgyAzS@*eu)9&C!VHMn18?FU_3@z6p&ADQ+wbh7IOf$OhF_
zlzHBF8?+S&AtQlaSAXucpjM{RsI@rgW=g>LCZy}kwn@|jT|iXj?v3DO)pzVt-k>X<
zG=Lsr18Qy+F7}<+II5;@u5ip1wnLY67A!JYi$1nUNnhP_Qqpr<#4%amDXE?|YU|1u
zh*KF|SDngvs=;(|ux6fqnG{J$+L*P)<CbzC8rZWkV>(zz9hvK8_b`7v#*=riGwA5X
zWhF!Daul#|)um_zw0zu{HW60{J`xz(*2>yASGLLe8V>esS~zQ?Mtwg3yY{*3y{0f8
z?gF-!wy`4m!i~LhDg|~naZU|$|C;|J-ViO*TS#oH9n2nUl8K2_zi*M1xDD)ljs?h&
zLUXhtjZbydmbT}vX&;0DhEA15%rqtn=@u~G9|jL53%1VcQ&0X7LynQ4s3GCvLsnwp
zo~Bb2gt@&_i?7Y+>JxFw&)f1gI_(P~e&@+1uhnalrPO}^8rq5P9`fpPxe^+voqVA<
z`1U$$)cNw5k6C2DepQM8c5!`iWySY)6&MJ9AJ8R?%a?@|y-&s|z|8~P3ky6XpBAo*
zs9U0B-sj!NC}#q@p8-w1pNcY81vF2ng0PA-1uWB5y5)-I?<CBq`W?`QCx*6;zE{@k
zKV5KCOtS*B#lZlBp@-Y(d(k23=tS4iZUfvbBTLgtSX|URPO!~Ez^FjsK5z%g7`2jL
zw>eLH2u1fGk==^wz`)v~FAz8)V;?Stm>4W6806H?uIVHn$c*wNTca&1q@rV}E2-We
zu6CimF;v7DkrdJ9tzxWb>qOKR(wpxi0pVx!{VF()EJagSQQ?lO4&H8|ae<_S$Un!G
zY#VN@IpXKG>k6WHu!0d*AlEH%GX!PGG@F_jsdK&13}+wEpmeW$AhJWlQirVXR2soS
zY&AnX=*AD1v??Sro9PYecCMTYn{aNQj_}D=8{ty7*OS~gh0#?2d6KDNhXQ@?c;g-&
z2A}&=>lGQ^;4&!mrn{ZTTH0FwZd+N#WGCrlOI1039W&eXjM7~YQ$s0P*v}}9w*;Mx
zwZMM!7^L{XXA-}!gcUw8pc4ZU);CZp8<s&Rh+F!|z#3KXBvMU&rT+DCPLBNfI(7}a
za`f?D1E!%*54OUc<_YFC8Uzl4;c?w2{;YKE84TnQ8Hy=eDf!KVQnjpBLMsGm9a7e+
zht0+ksAEBykGw_<aHH6;(Mq$rsJfXS5?G(o2Cy#*T**i$28Zjx$d^NZ^lcu(;?|X`
zG@gDM>Xlr^>NbH2Pyl|`Uov5U*=*P;{@E34W;3y=VyDW<>Xh7#h7=rSiZaSjID#ju
zo!`g&a%9PT)BEs2$C#{eHfP%QRhahpVJc=i$k}vw#QYP_mY!&z<vxBEA$BM(tPA_q
z@nx}}oOY%h=(c06M4RabdOhRYNI?<iSPUY@;_8fUW^b)cu`t{0_u9n$*N_61iD}fk
z6;{&m&m5U-&&M;Mtxa7N<I@fO{?MHE*(7;iw7WVohk@G><3vl4bq`?nmDOtak2E<W
z+lIlim$HG7QMuBg!3dX2s;)o{<+RrD3d2J9luD>$pPeLbp|JCQw8ku1Wm_-FLW}D+
zJ!CBw9AbgLG~TbzxaEf%krjPQ7_Tg5b+@2acjrZus|7OKJTweFI-N;3@+g&EPw<|h
zH~y*YZ0^W|dyX5#M|JlNG}t*?B~H7U12kPT#fGxf^9c3(C*-p$9&XbKOB$Qoi_I@9
zixu7Trxz6s7}DS8P6-#yHzFzc+8}D!H(ZzvA3|hw5)c|h4bP*VVs(e~SCMVSWi+`$
zorWM4-mJ0aI3v;DuH#Z4<md?M+eysRWYpJ<QY9om^CvguC9PgaZ62c=>2?{$*ty~D
z)QBcz=w2K(9ixt<Y8j5L&bAz*db@xn7WZyxsXC9ak>YVG13MLZY6-qB)y8SP{l+AO
zpK|<Y-bt5b-2b-SpuBH4n!@kA+k4?vTlt%{ft}W`&89w9_7}bX3yO%Ws`gEZs;GdX
zzi2dQO@Y8pfR;Ue%<%3x;f9I=11^qF2{(8eZ@c?O@^O#*EQHpuhEds6F+#<=yKgB#
zC7%zYG8aMxQMjnP-z$EJxcK26>@ASz38jFjn2x96)sxkVAt(>{3C58bBG3%B%|MM|
zl*zA!58f^Hb$xK|D-TSO&8+qg-`YH|Ui+j5t!O(3fJsDWR%tsH;n8U@dI{~YD4Ns}
z6OUrbJu)@6Kv~Iv2$HhsX_X;c#(G0VN*%x|%UyIfq3qPFP>KK#)-PI(_ZzF>F7Oc8
zWk1&~U6yiJ9Ni)g6kKobH3Oux8M_gGfl=pblbIUz74i$$)=hXZ2>ODDnxn?EV2zna
z{~!*Z5b0uYpKh;CUMR8ha=VD|3&k*3A~e#x@9VUnzl;q8j0*VcV~RgH_+M1#zsvvq
zwBqmjf0C%bisgUF)_ae?_5bAU|E~Tg0r&6S>%I^6xB9>Fu7B76vx@(Vru$0_-o5J|
z-T#ZU`@8a=!1&+Ct^@r~<v(HY-_`#FhriJCUm}I^Pxb$RrT_k<Kf%{8F!`7GfA}|O
z`FF)Xm(ee5_Lo>;{&NU_M{0k6#GlUbYrXv?tN8!#>XVlSdp9ir0QG(dz0>E?gul-I
E2gmPX7XSbN


From cba51fd60e21fa39ebb4bd3826c9c151c621e739 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 18:30:53 -0800
Subject: [PATCH 05/10] update setup recommend

---
 solo_server/ensemble.yaml |   3 +-
 solo_server/main.py       | 311 ++++++++++++++++++++++----------------
 2 files changed, 186 insertions(+), 128 deletions(-)

diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml
index 61f0516..49fbede 100644
--- a/solo_server/ensemble.yaml
+++ b/solo_server/ensemble.yaml
@@ -5,5 +5,6 @@ hardware:
   cpu_model: Intel i7
   gpu_memory: 4
   memory_gb: 16
+model_choice: null
 module_pack: pro
-selected_model: microsoft/phi-2
+selected_model: microsoft/phi-4
diff --git a/solo_server/main.py b/solo_server/main.py
index 72c73b4..e3cb6e7 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -7,19 +7,40 @@
 from rich.panel import Panel
 from rich.theme import Theme
 from rich import box
-from litgpt import LLM  # Requires: pip install 'litgpt[all]'
 
-app = typer.Typer()
+app = typer.Typer(help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, and advanced module loading.")
 
-# Define a custom neon blue theme
-solo_theme = Theme({
-    "info": "bold bright_blue",
-    "warning": "bold magenta",
-    "success": "bold bright_blue",
+# Define a Google-inspired theme (blue, red, yellow, green)
+google_theme = Theme({
+    "header": "bold #4285F4",      # Google Blue
+    "info": "bold #4285F4",        # Google Blue
+    "warning": "bold #DB4437",     # Google Red
+    "success": "bold #0F9D58",     # Google Green
+    "prompt": "bold #F4B400",      # Google Yellow
     "panel.border": "bright_blue",
-    "panel.title": "bright_cyan"
+    "panel.title": "bold white"
 })
-console = Console(theme=solo_theme)
+console = Console(theme=google_theme)
+
+# Model options mapping (based on your table)
+# Here we assume the "smallest fastest" option for each family:
+MODEL_OPTIONS = {
+    "llama3": "meta-llama/Llama-3.1-1B-Instruct",    # Smallest variant from Llama 3 family
+    "code_llama": "meta-llama/Code-Llama-7B",          # Smallest variant for Code Llama
+    "codegemma": "google/CodeGemma-7B",                # Only one variant for CodeGemma
+    "gemma2": "google/Gemma2-2B",                      # Smallest variant for Gemma 2
+    "phi4": "microsoft/phi-4",                         # Only one option for Phi 4 (14B)
+    "qwen2.5": "qwen2.5/0.5B",                         # Smallest variant for Qwen2.5
+    "qwen2.5_coder": "qwen2.5-coder/0.5B",             # Smallest variant for Qwen2.5 Coder
+    "r1_distill_llama": "deepseek-ai/R1-Distill-Llama-8B"  # Smallest variant for R1 Distill Llama
+}
+
+def print_banner():
+    """Display a header banner for the Solo Server CLI."""
+    banner_text = """
+
+    """
+    console.print(Panel(banner_text, style="header", border_style="panel.border", title="SOLO SERVER INIT", box=box.DOUBLE))
 
 def detect_hardware():
     """
@@ -42,16 +63,76 @@ def get_hardware_category(memory_gb: float) -> str:
     else:
         return "Maestro"
 
-def build_docker_ensemble(module_pack: str):
+def auto_select_model(hardware_category: str) -> str:
+    """
+    Auto-select a default model based on hardware category.
+    For each situation, we recommend the smallest and fastest model available.
+    """
+    mapping = {
+        "Fresh Adopter": MODEL_OPTIONS["llama3"],
+        "Mid Range": MODEL_OPTIONS["code_llama"],
+        "High Performance": MODEL_OPTIONS["phi4"],
+        "Maestro": MODEL_OPTIONS["r1_distill_llama"]
+    }
+    return mapping.get(hardware_category, MODEL_OPTIONS["llama3"])
+
+def simulate_model_download(selected_model: str, sleep_time: int = 3):
+    """
+    Simulate model download with a delay.
+    """
+    with console.status(f"[info]Downloading model {selected_model}...[/info]", spinner="dots"):
+        time.sleep(sleep_time)  # Simulate download delay
+    return f"[success]Model {selected_model} download complete.[/success]"
+
+def prompt_core_initialization(confirm_fn=typer.confirm) -> bool:
+    """
+    Ask user to confirm core initialization.
+    """
+    init_prompt = (
+        "Continue to solo core initialization?\n"
+        "Yes: Proceed with full initialization and model setup\n"
+        "No:  Exit setup"
+    )
+    console.print(
+        Panel(init_prompt, title="Core Initialization", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
+    )
+    return confirm_fn("", default=True)
+
+def prompt_advanced_modules(confirm_fn=typer.confirm, prompt_fn=typer.prompt) -> (bool, str):
+    """
+    Ask user if they want to load advanced modules and select module pack if yes.
+    Returns a tuple (advanced_modules, module_pack)
+    """
+    adv_prompt = (
+        "Load advanced modules?\n"
+        "Yes: Load additional functionalities and module packs\n"
+        "No:  Skip advanced modules"
+    )
+    console.print(
+        Panel(adv_prompt, title="Advanced Modules", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
+    )
+    advanced_modules = confirm_fn("", default=True)
+    module_pack = None
+    if advanced_modules:
+        module_pack_info = (
+            "Choose module pack:\n"
+            "pro             - Pro Pack: RAG, LangChain, Transformers\n"
+            "industrial      - Industrial Pack: PyTorch, Tensorflow, vLLM\n"
+            "robotics        - Robotics Pack: ROS, LeRobot, OpenEMMA\n"
+            "custom ensemble - Custom Ensemble: Additional containers\n"
+            "Enter your choice:"
+        )
+        console.print(
+            Panel(module_pack_info, title="Module Pack Options", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
+        )
+        module_pack = prompt_fn("", type=click.Choice(["pro", "industrial", "robotics", "custom ensemble"], case_sensitive=False), default="pro")
+    return advanced_modules, module_pack
+
+def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run):
     """
     Build an ensemble of Docker images for the selected module pack.
-    The Dockerfiles are organized in subfolders within the "containers" folder.
-    
-    Adjust this dictionary to match the folders in your "containers/" directory
-    and how you want them grouped by module pack.
     """
     docker_modules = {
-        # Example grouping (adjust as needed):
         "pro": [
             "rag",
             "langchain",
@@ -67,7 +148,6 @@ def build_docker_ensemble(module_pack: str):
             "LeRobot",
             "OpenEMMA"
         ],
-        # You can place additional folders here for a "custom ensemble"
         "custom ensemble": [
             "Browser Use",
             "Computer Use",
@@ -78,24 +158,17 @@ def build_docker_ensemble(module_pack: str):
             "llama-index"
         ]
     }
-
     modules = docker_modules.get(module_pack.lower(), [])
     if not modules:
-        console.print(f"[magenta]No modules found for the '{module_pack}' pack. Adjust your dictionary as needed.[/magenta]")
+        console.print(f"[warning]No modules found for the '{module_pack}' pack. Adjust your dictionary as needed.[/warning]")
         return
-    
+
     for module in modules:
-        console.print(f"[bright_blue]Building Docker image for module:[/bright_blue] {module}")
-        
-        # Replace spaces in the module name when creating the image tag
+        console.print(f"[info]Building Docker image for module:[/info] {module}")
         image_tag = module.lower().replace(' ', '-')
-        
-        # If your folder name has spaces, you may need to quote or escape them.
-        # Here we assume your OS can handle the direct string (Linux usually can with a directory rename).
         build_path = f"./containers/{module}"
-        
         try:
-            subprocess.run(
+            run_subprocess_fn(
                 [
                     "docker", 
                     "build", 
@@ -105,149 +178,133 @@ def build_docker_ensemble(module_pack: str):
                 check=True,
                 capture_output=True
             )
-            console.print(f"[bright_cyan]Successfully built image for:[/bright_cyan] {module}")
+            console.print(f"[success]Successfully built image for:[/success] {module}")
         except subprocess.CalledProcessError as e:
             console.print(f"[warning]Docker build failed for module {module}: {e}[/warning]")
 
+def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml"):
+    """
+    Save setup information to a YAML file.
+    """
+    with open(filename, "w") as f:
+        yaml.dump(setup_info, f)
+    return f"[success]Setup information saved to {filename}.[/success]"
+
+def serve_model(model: str, port: int = 5070, run_subprocess_fn=subprocess.run) -> str:
+    """
+    Serve the model using the LitGPT CLI syntax.
+    Example: litgpt serve meta-llama/Llama-3.1-1B-Instruct --port 5070
+    """
+    try:
+        cmd = ["litgpt", "serve", model, "--port", str(port)]
+        run_subprocess_fn(cmd, check=True)
+        return f"[success]Server started on port {port} with model: {model}[/success]"
+    except subprocess.CalledProcessError as e:
+        return f"[warning]Failed to start server: {e}[/warning]"
+
+def get_hardware_info():
+    """
+    Get hardware information and categorization.
+    """
+    cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware()
+    hardware_category = get_hardware_category(memory_gb)
+    hardware_info = {
+        "cpu_model": cpu_model,
+        "cpu_cores": cpu_cores,
+        "memory_gb": memory_gb,
+        "gpu_memory": gpu_memory,
+        "category": hardware_category
+    }
+    return hardware_info
+
 @app.command()
-def setup():
+def setup(
+    model_choice: str = typer.Option(
+        None,
+        "--model",
+        "-m",
+        help="Optional model choice. Options: " + ", ".join(MODEL_OPTIONS.keys())
+    )
+):
+    """Run the full solo server setup."""
     console.print("\n")
-    
+    print_banner()
+    console.print("\n")
+
     # Step 1: Hardware Detection & Categorization
-    typer.echo("Detecting hardware...")
-    cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware()
-    hardware_category = get_hardware_category(memory_gb)
-    hardware_info = (
-        f"CPU: {cpu_model} ({cpu_cores} cores)\n"
-        f"Memory: {memory_gb} GB\n"
-        f"GPU Memory: {gpu_memory} GB\n"
-        f"Category: {hardware_category}"
+    typer.echo("[info]Detecting hardware...[/info]")
+    hardware_info = get_hardware_info()
+    hardware_info_str = (
+        f"CPU: {hardware_info['cpu_model']} ({hardware_info['cpu_cores']} cores)\n"
+        f"Memory: {hardware_info['memory_gb']} GB\n"
+        f"GPU Memory: {hardware_info['gpu_memory']} GB\n"
+        f"Category: {hardware_info['category']}"
     )
     console.print(
-        Panel(hardware_info, title="Hardware Info", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+        Panel(hardware_info_str, title="Hardware Info", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
     )
     
     # Step 2: Core Initialization Prompt
-    init_prompt = (
-        "Continue to solo core initialization?\n"
-        "Yes: Proceed with full initialization and model setup\n"
-        "No:  Exit setup"
-    )
-    console.print(
-        Panel(init_prompt, title="Core Initialization", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
-    )
-    if not typer.confirm("", default=True):
-        typer.echo("Exiting setup.")
+    if not prompt_core_initialization():
+        typer.echo("[warning]Exiting setup.[/warning]")
         raise typer.Exit()
     
     console.print("\n")
     
     # Step 3: Model Selection & Download Simulation
-    model_map = {
-        "Fresh Adopter": "SmolLM2-135M",
-        "Mid Range": "Qwen2.5-0.5B",
-        "High Performance": "microsoft/phi-2",
-        "Maestro": "Deepseek-r1"
-    }
-    selected_model = model_map.get(hardware_category, "SmolLM2-135M")
-    with console.status(f"Downloading model {selected_model}...", spinner="dots", spinner_style="bold bright_blue"):
-        time.sleep(3)  # Simulate download delay
-    typer.echo(f"Model {selected_model} download complete.")
+    if model_choice:
+        # Use user provided model option if valid
+        selected_model = MODEL_OPTIONS.get(model_choice.lower())
+        if not selected_model:
+            typer.echo(f"[warning]Invalid model choice: {model_choice}. Falling back to auto-selection.[/warning]")
+            selected_model = auto_select_model(hardware_info['category'])
+    else:
+        selected_model = auto_select_model(hardware_info['category'])
+    
+    download_message = simulate_model_download(selected_model)
+    typer.echo(download_message)
     
     console.print("\n")
     
     # Step 4: Advanced Modules Prompt
-    adv_prompt = (
-        "Load advanced modules?\n"
-        "Yes: Load additional functionalities and module packs\n"
-        "No:  Skip advanced modules"
-    )
-    console.print(
-        Panel(adv_prompt, title="Advanced Modules", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
-    )
-    advanced_modules = typer.confirm("", default=True)
-    module_pack = None
+    advanced_modules, module_pack = prompt_advanced_modules()
     if advanced_modules:
-        module_pack_info = (
-            "Choose module pack:\n"
-            "pro             - Pro Pack: RAG, LangChain, Transformers\n"
-            "industrial      - Industrial Pack: PyTorch, Tensorflow, vLLM\n"
-            "robotics        - Robotics Pack: ROS, LeRobot, OpenEMMA\n"
-            "custom ensemble - Custom Ensemble: A variety of additional containers\n"
-            "Enter your choice:"
-        )
-        console.print(
-            Panel(module_pack_info, title="Module Pack Options", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
-        )
-        module_pack = typer.prompt("", type=click.Choice(["pro", "industrial", "robotics", "custom ensemble"], case_sensitive=False), default="pro")
-        typer.echo(f"Module pack selected: {module_pack}")
+        typer.echo(f"[info]Module pack selected: {module_pack}[/info]")
     else:
-        typer.echo("Skipping advanced modules.")
+        typer.echo("[info]Skipping advanced modules.[/info]")
     
     console.print("\n")
     
     # Step 5: Save Setup Information to ensemble.yaml
     setup_info = {
-        "hardware": {
-            "cpu_model": cpu_model,
-            "cpu_cores": cpu_cores,
-            "memory_gb": memory_gb,
-            "gpu_memory": gpu_memory,
-            "category": hardware_category
-        },
+        "hardware": hardware_info,
         "selected_model": selected_model,
         "advanced_modules": advanced_modules,
-        "module_pack": module_pack
+        "module_pack": module_pack,
+        "model_choice": model_choice
     }
-    with open("ensemble.yaml", "w") as f:
-        yaml.dump(setup_info, f)
-    typer.echo("Setup information saved to ensemble.yaml.")
+    save_message = save_setup_info(setup_info)
+    typer.echo(save_message)
     
-    # Step 6: If advanced modules enabled, start Docker ensemble builds
+    # Step 6: Docker Ensemble Build for Advanced Modules
     if advanced_modules and module_pack:
         console.print(
-            Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+            Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
         )
         build_docker_ensemble(module_pack)
     
     console.print("\n")
     console.print(
-        Panel("Solo core initialization complete!", title="Setup Complete", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
+        Panel("Solo core initialization complete!", title="Setup Complete", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
     )
     console.print("\n")
     
-    # Step 7: Load the LLM using litgpt
-    console.print(
-        Panel(f"Loading LLM model: {selected_model}", title="LLM Load", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
-    )
-    try:
-        llm = LLM.load(selected_model)
-        typer.echo("LLM loaded successfully.")
-    except Exception as e:
-        typer.echo(f"Failed to load LLM: {e}")
-        raise typer.Exit()
-    
-    # Step 8: Start the server on port 5070
+    # Step 7: Serve the Model using LitGPT CLI syntax
     console.print(
-        Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2))
-    )
-    try:
-        llm.serve(port=5070)
-    except Exception as e:
-        typer.echo(f"Failed to start server: {e}")
-    
-    # Step 9: Optionally Generate Text
-    prompt_text = typer.prompt(
-        "Enter a prompt to generate text (default: 'Fix the spelling: Every fall, the familly goes to the mountains.')",
-        default="Fix the spelling: Every fall, the familly goes to the mountains."
+        Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
     )
-    typer.echo("Generating text...")
-    try:
-        generated_text = llm.generate(prompt_text)
-        typer.echo("\nGenerated text:")
-        typer.echo(generated_text)
-    except Exception as e:
-        typer.echo(f"Failed to generate text: {e}")
+    server_message = serve_model(selected_model, port=5070)
+    typer.echo(server_message)
 
 if __name__ == "__main__":
     app()

From e2c2797190f720b320fbaf7d74572e224b0cb392 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 19:07:01 -0800
Subject: [PATCH 06/10] add updated containers

---
 solo_server/ensemble.yaml |  12 ++-
 solo_server/main.py       | 180 +++++++++++++++++++-------------------
 2 files changed, 101 insertions(+), 91 deletions(-)

diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml
index 49fbede..eb52ced 100644
--- a/solo_server/ensemble.yaml
+++ b/solo_server/ensemble.yaml
@@ -1,10 +1,20 @@
 advanced_modules: true
+checkpoint_dir: checkpoints/HuggingFaceTB/SmolLM2-1.7B-Instruct
+devices: 1
 hardware:
   category: High Performance
   cpu_cores: 8
   cpu_model: Intel i7
   gpu_memory: 4
   memory_gb: 16
+max_new_tokens: 50
 model_choice: null
 module_pack: pro
-selected_model: microsoft/phi-4
+port: 5070
+precision: null
+quantize: null
+selected_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
+stream: false
+temperature: 0.8
+top_k: 50
+top_p: 1.0
diff --git a/solo_server/main.py b/solo_server/main.py
index e3cb6e7..b501371 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -1,8 +1,10 @@
 import time
 import subprocess
+import socket
 import typer
 import click
 import yaml
+from pathlib import Path
 from rich.console import Console
 from rich.panel import Panel
 from rich.theme import Theme
@@ -10,35 +12,29 @@
 
 app = typer.Typer(help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, and advanced module loading.")
 
-# Define a Google-inspired theme (blue, red, yellow, green)
+# Google-inspired theme
 google_theme = Theme({
-    "header": "bold #4285F4",      # Google Blue
-    "info": "bold #4285F4",        # Google Blue
-    "warning": "bold #DB4437",     # Google Red
-    "success": "bold #0F9D58",     # Google Green
-    "prompt": "bold #F4B400",      # Google Yellow
+    "header": "bold #4285F4",
+    "info": "bold #4285F4",
+    "warning": "bold #DB4437",
+    "success": "bold #0F9D58",
     "panel.border": "bright_blue",
     "panel.title": "bold white"
 })
 console = Console(theme=google_theme)
 
-# Model options mapping (based on your table)
-# Here we assume the "smallest fastest" option for each family:
-MODEL_OPTIONS = {
-    "llama3": "meta-llama/Llama-3.1-1B-Instruct",    # Smallest variant from Llama 3 family
-    "code_llama": "meta-llama/Code-Llama-7B",          # Smallest variant for Code Llama
-    "codegemma": "google/CodeGemma-7B",                # Only one variant for CodeGemma
-    "gemma2": "google/Gemma2-2B",                      # Smallest variant for Gemma 2
-    "phi4": "microsoft/phi-4",                         # Only one option for Phi 4 (14B)
-    "qwen2.5": "qwen2.5/0.5B",                         # Smallest variant for Qwen2.5
-    "qwen2.5_coder": "qwen2.5-coder/0.5B",             # Smallest variant for Qwen2.5 Coder
-    "r1_distill_llama": "deepseek-ai/R1-Distill-Llama-8B"  # Smallest variant for R1 Distill Llama
-}
+# Hard-coded model and starting port
+MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
+START_PORT = 5070
 
 def print_banner():
     """Display a header banner for the Solo Server CLI."""
     banner_text = """
-
+    ___  _             __      __  _ 
+   / _ \(_)___  ___   / /___  / /_(_)
+  / , _/ / _ \/ -_) / / __/ / __/ / 
+ /_/|_/_/ .__/\__/ /_/\__/  \__/_/  
+       /_/                         
     """
     console.print(Panel(banner_text, style="header", border_style="panel.border", title="SOLO SERVER INIT", box=box.DOUBLE))
 
@@ -63,26 +59,13 @@ def get_hardware_category(memory_gb: float) -> str:
     else:
         return "Maestro"
 
-def auto_select_model(hardware_category: str) -> str:
-    """
-    Auto-select a default model based on hardware category.
-    For each situation, we recommend the smallest and fastest model available.
-    """
-    mapping = {
-        "Fresh Adopter": MODEL_OPTIONS["llama3"],
-        "Mid Range": MODEL_OPTIONS["code_llama"],
-        "High Performance": MODEL_OPTIONS["phi4"],
-        "Maestro": MODEL_OPTIONS["r1_distill_llama"]
-    }
-    return mapping.get(hardware_category, MODEL_OPTIONS["llama3"])
-
-def simulate_model_download(selected_model: str, sleep_time: int = 3):
+def simulate_model_download(model: str, sleep_time: int = 3) -> str:
     """
     Simulate model download with a delay.
     """
-    with console.status(f"[info]Downloading model {selected_model}...[/info]", spinner="dots"):
-        time.sleep(sleep_time)  # Simulate download delay
-    return f"[success]Model {selected_model} download complete.[/success]"
+    with console.status(f"[info]Downloading model {model}...[/info]", spinner="dots"):
+        time.sleep(sleep_time)
+    return f"[success]Model {model} download complete.[/success]"
 
 def prompt_core_initialization(confirm_fn=typer.confirm) -> bool:
     """
@@ -131,6 +114,8 @@ def prompt_advanced_modules(confirm_fn=typer.confirm, prompt_fn=typer.prompt) ->
 def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run):
     """
     Build an ensemble of Docker images for the selected module pack.
+    Checks if the Dockerfile directory exists.
+    Adjusted to use the path: commands/containers/<module>
     """
     docker_modules = {
         "pro": [
@@ -164,17 +149,16 @@ def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run):
         return
 
     for module in modules:
+        # Update the build path to use the relative path from main.py.
+        build_path = Path("commands") / "containers" / module
+        if not build_path.exists():
+            console.print(f"[warning]Path {build_path} does not exist. Skipping module {module}.[/warning]")
+            continue
         console.print(f"[info]Building Docker image for module:[/info] {module}")
         image_tag = module.lower().replace(' ', '-')
-        build_path = f"./containers/{module}"
         try:
             run_subprocess_fn(
-                [
-                    "docker", 
-                    "build", 
-                    "-t", f"ensemble/{image_tag}", 
-                    build_path
-                ],
+                ["docker", "build", "-t", f"ensemble/{image_tag}", str(build_path)],
                 check=True,
                 capture_output=True
             )
@@ -182,7 +166,7 @@ def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run):
         except subprocess.CalledProcessError as e:
             console.print(f"[warning]Docker build failed for module {module}: {e}[/warning]")
 
-def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml"):
+def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml") -> str:
     """
     Save setup information to a YAML file.
     """
@@ -190,40 +174,62 @@ def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml"):
         yaml.dump(setup_info, f)
     return f"[success]Setup information saved to {filename}.[/success]"
 
-def serve_model(model: str, port: int = 5070, run_subprocess_fn=subprocess.run) -> str:
+def get_available_port(start_port: int) -> int:
+    """
+    Return the first available port starting from start_port.
+    """
+    port = start_port
+    while True:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            try:
+                s.bind(("", port))
+                return port
+            except OSError:
+                port += 1
+
+def serve_model(model: str, port: int, run_subprocess_fn=subprocess.run) -> (str, int):
     """
     Serve the model using the LitGPT CLI syntax.
-    Example: litgpt serve meta-llama/Llama-3.1-1B-Instruct --port 5070
+    If the given port is in use, automatically increment to the next available port.
+    Returns a tuple of the success message and the port used.
     """
+    available_port = get_available_port(port)
     try:
-        cmd = ["litgpt", "serve", model, "--port", str(port)]
-        run_subprocess_fn(cmd, check=True)
-        return f"[success]Server started on port {port} with model: {model}[/success]"
+        cmd = ["litgpt", "serve", model, "--port", str(available_port)]
+        run_subprocess_fn(cmd, check=True, capture_output=True, text=True)
+        success_msg = f"[success]Server started on port {available_port} with model: {model}[/success]"
+        # Print a sample curl command for testing.
+        test_curl = f"curl http://localhost:{available_port}/"
+        console.print(f"[info]You can test the server with: {test_curl}[/info]")
+        return success_msg, available_port
     except subprocess.CalledProcessError as e:
-        return f"[warning]Failed to start server: {e}[/warning]"
+        error_output = e.stderr.strip() if e.stderr else str(e)
+        console.print(f"ERROR:    {error_output}")
+        return f"[warning]Failed to start server: {e}[/warning]", available_port
 
-def get_hardware_info():
+def get_hardware_info() -> dict:
     """
     Get hardware information and categorization.
     """
     cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware()
     hardware_category = get_hardware_category(memory_gb)
-    hardware_info = {
+    return {
         "cpu_model": cpu_model,
         "cpu_cores": cpu_cores,
         "memory_gb": memory_gb,
         "gpu_memory": gpu_memory,
         "category": hardware_category
     }
-    return hardware_info
 
 @app.command()
 def setup(
+    # Although the original flow allowed a model_choice,
+    # we now always use HuggingFaceTB/SmolLM2-1.7B-Instruct.
     model_choice: str = typer.Option(
         None,
         "--model",
         "-m",
-        help="Optional model choice. Options: " + ", ".join(MODEL_OPTIONS.keys())
+        help="Optional model choice (ignored in this setup; always uses HuggingFaceTB/SmolLM2-1.7B-Instruct)"
     )
 ):
     """Run the full solo server setup."""
@@ -232,7 +238,7 @@ def setup(
     console.print("\n")
 
     # Step 1: Hardware Detection & Categorization
-    typer.echo("[info]Detecting hardware...[/info]")
+    console.print("[info]Detecting hardware...[/info]")
     hardware_info = get_hardware_info()
     hardware_info_str = (
         f"CPU: {hardware_info['cpu_model']} ({hardware_info['cpu_cores']} cores)\n"
@@ -240,71 +246,65 @@ def setup(
         f"GPU Memory: {hardware_info['gpu_memory']} GB\n"
         f"Category: {hardware_info['category']}"
     )
-    console.print(
-        Panel(hardware_info_str, title="Hardware Info", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
-    )
+    console.print(Panel(hardware_info_str, title="Hardware Info", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
     
     # Step 2: Core Initialization Prompt
     if not prompt_core_initialization():
-        typer.echo("[warning]Exiting setup.[/warning]")
+        console.print("[warning]Exiting setup.[/warning]")
         raise typer.Exit()
     
     console.print("\n")
     
-    # Step 3: Model Selection & Download Simulation
-    if model_choice:
-        # Use user provided model option if valid
-        selected_model = MODEL_OPTIONS.get(model_choice.lower())
-        if not selected_model:
-            typer.echo(f"[warning]Invalid model choice: {model_choice}. Falling back to auto-selection.[/warning]")
-            selected_model = auto_select_model(hardware_info['category'])
-    else:
-        selected_model = auto_select_model(hardware_info['category'])
-    
-    download_message = simulate_model_download(selected_model)
-    typer.echo(download_message)
+    # Step 3: Model Download Simulation (always uses the specified model)
+    download_message = simulate_model_download(MODEL)
+    console.print(download_message)
     
     console.print("\n")
     
-    # Step 4: Advanced Modules Prompt
+    # Step 4: Advanced Modules Prompt (optional)
     advanced_modules, module_pack = prompt_advanced_modules()
     if advanced_modules:
-        typer.echo(f"[info]Module pack selected: {module_pack}[/info]")
+        console.print(f"[info]Module pack selected: {module_pack}[/info]")
     else:
-        typer.echo("[info]Skipping advanced modules.[/info]")
+        console.print("[info]Skipping advanced modules.[/info]")
     
     console.print("\n")
     
-    # Step 5: Save Setup Information to ensemble.yaml
+    # Step 5: Save Setup Information to YAML and print config details
     setup_info = {
+        "checkpoint_dir": str(Path("checkpoints") / MODEL),
+        "devices": 1,
+        "max_new_tokens": 50,
+        "port": START_PORT,  # initial port, actual port may change
+        "precision": None,
+        "quantize": None,
+        "stream": False,
+        "temperature": 0.8,
+        "top_k": 50,
+        "top_p": 1.0,
+        "selected_model": MODEL,
         "hardware": hardware_info,
-        "selected_model": selected_model,
         "advanced_modules": advanced_modules,
         "module_pack": module_pack,
         "model_choice": model_choice
     }
     save_message = save_setup_info(setup_info)
-    typer.echo(save_message)
+    console.print(save_message)
+    console.print(setup_info)
     
-    # Step 6: Docker Ensemble Build for Advanced Modules
+    # Step 6: Docker Ensemble Build for Advanced Modules (if enabled)
     if advanced_modules and module_pack:
-        console.print(
-            Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
-        )
+        console.print(Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
         build_docker_ensemble(module_pack)
     
     console.print("\n")
-    console.print(
-        Panel("Solo core initialization complete!", title="Setup Complete", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
-    )
+    console.print(Panel("Solo core initialization complete!", title="Setup Complete", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
     console.print("\n")
     
-    # Step 7: Serve the Model using LitGPT CLI syntax
-    console.print(
-        Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
-    )
-    server_message = serve_model(selected_model, port=5070)
-    typer.echo(server_message)
+    # Step 7: Serve the Model using LitGPT CLI syntax and capture errors gracefully
+    console.print(Panel(f"Starting server with model: {MODEL}", title="Server", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+    server_message, used_port = serve_model(MODEL, port=START_PORT)
+    console.print(server_message)
 
 if __name__ == "__main__":
     app()

From b23fb75f799d9800ae2dff4665deaf3cf1c8d039 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 19:21:50 -0800
Subject: [PATCH 07/10] changes to docker assemblage

---
 solo_server/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/solo_server/main.py b/solo_server/main.py
index b501371..6bf1e6a 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -158,7 +158,7 @@ def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run):
         image_tag = module.lower().replace(' ', '-')
         try:
             run_subprocess_fn(
-                ["docker", "build", "-t", f"ensemble/{image_tag}", str(build_path)],
+                ["docker", "build", "-t", f"containers/{image_tag}", str(build_path)],
                 check=True,
                 capture_output=True
             )

From b784628f0fceb0580203d9c2a6d637485448efa2 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 19:33:46 -0800
Subject: [PATCH 08/10] add ensemble categories

---
 solo_server/ensemble.yaml |   2 +-
 solo_server/main.py       | 116 +++++++++++++++++++-------------------
 2 files changed, 60 insertions(+), 58 deletions(-)

diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml
index eb52ced..dc0979f 100644
--- a/solo_server/ensemble.yaml
+++ b/solo_server/ensemble.yaml
@@ -9,7 +9,7 @@ hardware:
   memory_gb: 16
 max_new_tokens: 50
 model_choice: null
-module_pack: pro
+module_pack: lean ensemble
 port: 5070
 precision: null
 quantize: null
diff --git a/solo_server/main.py b/solo_server/main.py
index 6bf1e6a..55e0405 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -5,6 +5,7 @@
 import click
 import yaml
 from pathlib import Path
+from tqdm import tqdm
 from rich.console import Console
 from rich.panel import Panel
 from rich.theme import Theme
@@ -61,10 +62,11 @@ def get_hardware_category(memory_gb: float) -> str:
 
 def simulate_model_download(model: str, sleep_time: int = 3) -> str:
     """
-    Simulate model download with a delay.
+    Simulate model download with a progress bar.
+    (sleep_time is in seconds; e.g., 3 sec ~ 0.05 mins)
     """
-    with console.status(f"[info]Downloading model {model}...[/info]", spinner="dots"):
-        time.sleep(sleep_time)
+    for _ in tqdm(range(sleep_time), desc="Downloading model (est. {:.2f} mins)".format(sleep_time/60), unit="sec", total=sleep_time):
+        time.sleep(1)
     return f"[success]Model {model} download complete.[/success]"
 
 def prompt_core_initialization(confirm_fn=typer.confirm) -> bool:
@@ -76,80 +78,77 @@ def prompt_core_initialization(confirm_fn=typer.confirm) -> bool:
         "Yes: Proceed with full initialization and model setup\n"
         "No:  Exit setup"
     )
-    console.print(
-        Panel(init_prompt, title="Core Initialization", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
-    )
+    console.print(Panel(init_prompt, title="Core Initialization", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
     return confirm_fn("", default=True)
 
+def test_downloaded_model(model: str, run_subprocess_fn=subprocess.run) -> str:
+    """
+    Prompt the user for a test prompt (defaulting to 'solo @@ test') and use the LitGPT CLI
+    to generate sample output from the downloaded model.
+    A progress bar shows the testing duration.
+    """
+    test_prompt = typer.prompt("Enter test prompt", default="solo @@ test")
+    console.print(f"[info]Testing model {model} with prompt: '{test_prompt}'[/info]")
+    for _ in tqdm(range(5), desc="Testing model (est. 0.08 mins)", unit="sec", total=5):
+        time.sleep(1)
+    try:
+        # Assuming the LitGPT CLI provides a generate command.
+        cmd = ["litgpt", "generate", model, "--prompt", test_prompt]
+        result = run_subprocess_fn(cmd, check=True, capture_output=True, text=True)
+        output = result.stdout.strip()
+        console.print(f"[success]Test generation output:[/success]\n{output}")
+        return output
+    except subprocess.CalledProcessError as e:
+        error_output = e.stderr.strip() if e.stderr else str(e)
+        console.print(f"[warning]Test generation failed: {error_output}[/warning]")
+        return ""
+
 def prompt_advanced_modules(confirm_fn=typer.confirm, prompt_fn=typer.prompt) -> (bool, str):
     """
-    Ask user if they want to load advanced modules and select module pack if yes.
+    Ask user if they want to load advanced modules and select a vertical.
+    New verticals include: secure enterprise, healthcare, robotics, and lean ensemble.
     Returns a tuple (advanced_modules, module_pack)
     """
     adv_prompt = (
         "Load advanced modules?\n"
-        "Yes: Load additional functionalities and module packs\n"
+        "Yes: Load additional functionalities for a vertical\n"
         "No:  Skip advanced modules"
     )
-    console.print(
-        Panel(adv_prompt, title="Advanced Modules", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
-    )
+    console.print(Panel(adv_prompt, title="Advanced Modules", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
     advanced_modules = confirm_fn("", default=True)
     module_pack = None
     if advanced_modules:
         module_pack_info = (
-            "Choose module pack:\n"
-            "pro             - Pro Pack: RAG, LangChain, Transformers\n"
-            "industrial      - Industrial Pack: PyTorch, Tensorflow, vLLM\n"
-            "robotics        - Robotics Pack: ROS, LeRobot, OpenEMMA\n"
-            "custom ensemble - Custom Ensemble: Additional containers\n"
+            "Choose advanced vertical:\n"
+            "secure enterprise - Modules for security and compliance\n"
+            "healthcare        - Modules for healthcare applications\n"
+            "robotics          - Modules for robotics integration\n"
+            "lean ensemble     - A lean set of general modules\n"
             "Enter your choice:"
         )
-        console.print(
-            Panel(module_pack_info, title="Module Pack Options", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))
-        )
-        module_pack = prompt_fn("", type=click.Choice(["pro", "industrial", "robotics", "custom ensemble"], case_sensitive=False), default="pro")
+        console.print(Panel(module_pack_info, title="Vertical Options", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+        module_pack = prompt_fn("", type=click.Choice(["secure enterprise", "healthcare", "robotics", "lean ensemble"], case_sensitive=False), default="lean ensemble")
     return advanced_modules, module_pack
 
 def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run):
     """
-    Build an ensemble of Docker images for the selected module pack.
-    Checks if the Dockerfile directory exists.
-    Adjusted to use the path: commands/containers/<module>
+    Build an ensemble of Docker images for the selected vertical.
+    Uses the path: commands/containers/<module> (relative to main.py).
+    A tqdm progress bar shows the estimated duration.
     """
-    docker_modules = {
-        "pro": [
-            "rag",
-            "langchain",
-            "Transformers"
-        ],
-        "industrial": [
-            "PyTorch",
-            "Tensorflow",
-            "vLLM"
-        ],
-        "robotics": [
-            "ROS",
-            "LeRobot",
-            "OpenEMMA"
-        ],
-        "custom ensemble": [
-            "Browser Use",
-            "Computer Use",
-            "Cosmos",
-            "homeassistant-core",
-            "JAX",
-            "LITA",
-            "llama-index"
-        ]
+    # New advanced module packs for different verticals
+    advanced_module_packs = {
+        "secure enterprise": ["auth", "data-encryption", "audit-log"],
+        "healthcare": ["hl7", "fhir-connector", "secure-patient"],
+        "robotics": ["ros", "le-robot", "robotics-core"],
+        "lean ensemble": ["microservice", "edge-ai", "light-transformers"]
     }
-    modules = docker_modules.get(module_pack.lower(), [])
+    modules = advanced_module_packs.get(module_pack.lower(), [])
     if not modules:
-        console.print(f"[warning]No modules found for the '{module_pack}' pack. Adjust your dictionary as needed.[/warning]")
+        console.print(f"[warning]No modules found for vertical '{module_pack}'.[/warning]")
         return
 
-    for module in modules:
-        # Update the build path to use the relative path from main.py.
+    for module in tqdm(modules, desc="Building Docker images (est. 2 mins/module)", unit="module", total=len(modules)):
         build_path = Path("commands") / "containers" / module
         if not build_path.exists():
             console.print(f"[warning]Path {build_path} does not exist. Skipping module {module}.[/warning]")
@@ -158,7 +157,7 @@ def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run):
         image_tag = module.lower().replace(' ', '-')
         try:
             run_subprocess_fn(
-                ["docker", "build", "-t", f"containers/{image_tag}", str(build_path)],
+                ["docker", "build", "-t", f"ensemble/{image_tag}", str(build_path)],
                 check=True,
                 capture_output=True
             )
@@ -198,7 +197,6 @@ def serve_model(model: str, port: int, run_subprocess_fn=subprocess.run) -> (str
         cmd = ["litgpt", "serve", model, "--port", str(available_port)]
         run_subprocess_fn(cmd, check=True, capture_output=True, text=True)
         success_msg = f"[success]Server started on port {available_port} with model: {model}[/success]"
-        # Print a sample curl command for testing.
         test_curl = f"curl http://localhost:{available_port}/"
         console.print(f"[info]You can test the server with: {test_curl}[/info]")
         return success_msg, available_port
@@ -223,8 +221,6 @@ def get_hardware_info() -> dict:
 
 @app.command()
 def setup(
-    # Although the original flow allowed a model_choice,
-    # we now always use HuggingFaceTB/SmolLM2-1.7B-Instruct.
     model_choice: str = typer.Option(
         None,
         "--model",
@@ -261,10 +257,16 @@ def setup(
     
     console.print("\n")
     
+    # NEW STEP: Test the downloaded model using the solo @@ structure
+    console.print(Panel("Testing downloaded model...", title="Test Model", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)))
+    test_output = test_downloaded_model(MODEL)
+    
+    console.print("\n")
+    
     # Step 4: Advanced Modules Prompt (optional)
     advanced_modules, module_pack = prompt_advanced_modules()
     if advanced_modules:
-        console.print(f"[info]Module pack selected: {module_pack}[/info]")
+        console.print(f"[info]Vertical selected: {module_pack}[/info]")
     else:
         console.print("[info]Skipping advanced modules.[/info]")
     

From adbc81cadf0b4c8bb6d6aa46d4bb2294fe8087cf Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 19:47:05 -0800
Subject: [PATCH 09/10] over complicated setup

---
 solo_server/commands/query.py | 68 +++++++++++++++++++++++++++++++++++
 solo_server/ensemble.yaml     |  2 +-
 solo_server/main.py           | 42 +++++++++++++++++++---
 3 files changed, 106 insertions(+), 6 deletions(-)
 create mode 100644 solo_server/commands/query.py

diff --git a/solo_server/commands/query.py b/solo_server/commands/query.py
new file mode 100644
index 0000000..ab5a5e2
--- /dev/null
+++ b/solo_server/commands/query.py
@@ -0,0 +1,68 @@
+import sys
+import typer
+import requests
+from litgpt import LLM
+from rich.console import Console
+
+console = Console()
+
+CORE_SERVER_PORT = 5070  # Change this if your core server runs on a different port
+CORE_SERVER_URL = f"http://localhost:{CORE_SERVER_PORT}/generate"
+
+def redirect_to_core_server(query: str, port: int = CORE_SERVER_PORT) -> None:
+    """
+    Redirect the given query to the core server via an HTTP POST request.
+    """
+    url = f"http://localhost:{port}/generate"
+    try:
+        response = requests.post(url, json={"prompt": query})
+        response.raise_for_status()
+        console.print("[success]Response from core server:[/success]")
+        console.print(response.text)
+    except Exception as e:
+        console.print(f"[warning]Error redirecting to core server: {e}[/warning]")
+
+def query_llm(query: str) -> None:
+    """
+    If the query exceeds 9000 characters, show an error.
+    Otherwise, load the model and generate a response.
+    """
+    if len(query) > 9000:
+        typer.echo("Error: Your query exceeds the maximum allowed length of 9000 characters. It's over 9000!")
+        raise typer.Exit(1)
+    
+    # Load the model and generate a response while showing a spinner
+    llm = LLM.load("Qwen/Qwen2.5-1.5B-Instruct")
+    with console.status("Generating response...", spinner="dots"):
+        response = llm.generate(query)
+    typer.echo(response)
+
+def interactive_mode():
+    console.print("Interactive Mode (type 'exit' or 'quit' to end):", style="bold green")
+    while True:
+        query_text = input(">> ").strip()
+        if query_text.lower() in ("exit", "quit"):
+            break
+        # If the query starts with "solo @@", redirect to the core server
+        if query_text.startswith("solo @@"):
+            # Remove the "solo @@" prefix before sending the query
+            core_query = query_text[len("solo @@"):].strip()
+            redirect_to_core_server(core_query)
+        else:
+            query_llm(query_text)
+
+if __name__ == "__main__":
+    # If invoked with "@@" as the first argument, treat the rest as the query.
+    # Otherwise, launch interactive mode.
+    if len(sys.argv) > 1 and sys.argv[1] == "@@":
+        if len(sys.argv) > 2:
+            query_text = " ".join(sys.argv[2:]).strip()
+        else:
+            typer.echo("Enter your query (end with EOF / Ctrl-D):")
+            query_text = sys.stdin.read().strip()
+        # If the query starts with "solo @@", remove that prefix.
+        if query_text.startswith("solo @@"):
+            query_text = query_text[len("solo @@"):].strip()
+        redirect_to_core_server(query_text)
+    else:
+        interactive_mode()
diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml
index dc0979f..ded92a2 100644
--- a/solo_server/ensemble.yaml
+++ b/solo_server/ensemble.yaml
@@ -9,7 +9,7 @@ hardware:
   memory_gb: 16
 max_new_tokens: 50
 model_choice: null
-module_pack: lean ensemble
+module_pack: robotics
 port: 5070
 precision: null
 quantize: null
diff --git a/solo_server/main.py b/solo_server/main.py
index 55e0405..05acc6a 100644
--- a/solo_server/main.py
+++ b/solo_server/main.py
@@ -1,6 +1,7 @@
 import time
 import subprocess
 import socket
+import sys
 import typer
 import click
 import yaml
@@ -11,7 +12,11 @@
 from rich.theme import Theme
 from rich import box
 
-app = typer.Typer(help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, and advanced module loading.")
+import commands.query
+
+app = typer.Typer(
+    help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, advanced module loading, and query redirection."
+)
 
 # Google-inspired theme
 google_theme = Theme({
@@ -32,9 +37,9 @@ def print_banner():
     """Display a header banner for the Solo Server CLI."""
     banner_text = """
     ___  _             __      __  _ 
-   / _ \(_)___  ___   / /___  / /_(_)
-  / , _/ / _ \/ -_) / / __/ / __/ / 
- /_/|_/_/ .__/\__/ /_/\__/  \__/_/  
+   / _ \\(_)___  ___   / /___  / /_(_)
+  / , _/ / _ \\/ -_) / / __/ / __/ / 
+ /_/|_/_/ .__/\\__/ /_/\\__/  \\__/_/  
        /_/                         
     """
     console.print(Panel(banner_text, style="header", border_style="panel.border", title="SOLO SERVER INIT", box=box.DOUBLE))
@@ -65,7 +70,9 @@ def simulate_model_download(model: str, sleep_time: int = 3) -> str:
     Simulate model download with a progress bar.
     (sleep_time is in seconds; e.g., 3 sec ~ 0.05 mins)
     """
-    for _ in tqdm(range(sleep_time), desc="Downloading model (est. {:.2f} mins)".format(sleep_time/60), unit="sec", total=sleep_time):
+    for _ in tqdm(range(sleep_time),
+                    desc="Downloading model (est. {:.2f} mins)".format(sleep_time/60),
+                    unit="sec", total=sleep_time):
         time.sleep(1)
     return f"[success]Model {model} download complete.[/success]"
 
@@ -308,5 +315,30 @@ def setup(
     server_message, used_port = serve_model(MODEL, port=START_PORT)
     console.print(server_message)
 
+@app.command()
+def query(query: str = typer.Argument(
+    None,
+    help="Query for the LLM. If omitted, interactive mode is launched."
+)):
+    """
+    Redirect queries to the appropriate functions in query.py.
+    If a query is provided, it is processed; otherwise, interactive mode is launched.
+    If the query starts with 'solo @@', the prefix is stripped and the core server is used.
+    """
+    try:
+        from commands.query import query_llm, redirect_to_core_server, interactive_mode
+    except ModuleNotFoundError:
+        console.print("[warning]Module 'query' not found. Please ensure query.py is in the same directory.[/warning]")
+        raise typer.Exit(1)
+    
+    if query is None:
+        interactive_mode()
+    else:
+        if query.startswith("solo @@"):
+            core_query = query[len("solo @@"):].strip()
+            redirect_to_core_server(core_query)
+        else:
+            query_llm(query)
+
 if __name__ == "__main__":
     app()

From aefaacf00c8c0feea074fb1cf905472ec29acec7 Mon Sep 17 00:00:00 2001
From: ddiddi <dhruv.diddi+1@gmail.com>
Date: Thu, 6 Mar 2025 20:32:00 -0800
Subject: [PATCH 10/10] add commands to consider for roadmap

---
 solo_server/advanced_cmd.py    |  51 +++++++++++
 solo_server/explorative_cmd.py | 155 +++++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+)
 create mode 100644 solo_server/advanced_cmd.py
 create mode 100644 solo_server/explorative_cmd.py

diff --git a/solo_server/advanced_cmd.py b/solo_server/advanced_cmd.py
new file mode 100644
index 0000000..0ece57b
--- /dev/null
+++ b/solo_server/advanced_cmd.py
@@ -0,0 +1,51 @@
+import typer
+from rich.console import Console
+from rich.panel import Panel
+
+app = typer.Typer(help="CLI for Advanced Model Operations and Model Export/Optimization")
+console = Console()
+
+# -------------------------------
+# Advanced Model Operations Group
+# -------------------------------
+advanced_app = typer.Typer(help="Commands for benchmarking, profiling, and stress testing your model.")
+app.add_typer(advanced_app, name="advanced")
+
+@advanced_app.command("benchmark")
+def benchmark():
+    """Run performance benchmarks on the model."""
+    console.print(Panel("Benchmark command executed", title="Benchmark", style="blue"))
+
+@advanced_app.command("profile")
+def profile():
+    """Profile model resource usage."""
+    console.print(Panel("Profile command executed", title="Profile", style="blue"))
+
+@advanced_app.command("stress-test")
+def stress_test():
+    """Stress test the model and server under high-load conditions."""
+    console.print(Panel("Stress-Test command executed", title="Stress Test", style="blue"))
+
+# -------------------------------
+# Model Export & Optimization Group
+# -------------------------------
+optimization_app = typer.Typer(help="Commands for exporting, quantizing, and fine-tuning the model.")
+app.add_typer(optimization_app, name="optimization")
+
+@optimization_app.command("export")
+def export_model():
+    """Export the model to various formats (e.g., ONNX, TensorRT, CoreML)."""
+    console.print(Panel("Export command executed", title="Export", style="green"))
+
+@optimization_app.command("quantize")
+def quantize():
+    """Apply quantization to reduce model size and improve efficiency."""
+    console.print(Panel("Quantize command executed", title="Quantize", style="green"))
+
+@optimization_app.command("finetune")
+def finetune():
+    """Fine-tune the model on custom datasets with specified hyperparameters."""
+    console.print(Panel("Finetune command executed", title="Finetune", style="green"))
+
+if __name__ == "__main__":
+    app()
diff --git a/solo_server/explorative_cmd.py b/solo_server/explorative_cmd.py
new file mode 100644
index 0000000..93d5c29
--- /dev/null
+++ b/solo_server/explorative_cmd.py
@@ -0,0 +1,155 @@
+import typer
+from rich.console import Console
+from rich.panel import Panel
+
+app = typer.Typer(help="Solo CLI - A comprehensive tool for model management and server operations.")
+console = Console()
+
+# ---------------------------------
+# Setup Commands Group
+# ---------------------------------
+setup_app = typer.Typer(help="Commands for initializing and setting up the environment.")
+app.add_typer(setup_app, name="setup")
+
+@setup_app.command("full")
+def full_setup():
+    """Run full server setup."""
+    console.print(Panel("Full Setup executed", title="Setup", style="green"))
+
+@setup_app.command("init")
+def init():
+    """Reinitialize core components."""
+    console.print(Panel("Init executed", title="Init", style="green"))
+
+# ---------------------------------
+# Model Management Group
+# ---------------------------------
+model_app = typer.Typer(help="Manage model downloads, updates, and tests.")
+app.add_typer(model_app, name="model")
+
+@model_app.command("download")
+def download_model():
+    """Download or update the model."""
+    console.print(Panel("Download executed", title="Download", style="green"))
+
+@model_app.command("update")
+def update_model():
+    """Update the model to the latest version."""
+    console.print(Panel("Update Model executed", title="Update Model", style="green"))
+
+@model_app.command("test")
+def test_model():
+    """Test the downloaded model with a sample prompt."""
+    console.print(Panel("Test executed", title="Test", style="green"))
+
+# ---------------------------------
+# Query & Interaction Group
+# ---------------------------------
+query_app = typer.Typer(help="Handle one-off queries or launch interactive mode.")
+app.add_typer(query_app, name="query")
+
+@query_app.command("ask")
+def ask(query: str = typer.Argument(..., help="Query for the model")):
+    """Send a query to the model."""
+    # Check for "solo @@" prefix and adjust query if necessary
+    if query.startswith("solo @@"):
+        query = query[len("solo @@"):].strip()
+    console.print(Panel(f"Query: {query}", title="Query", style="green"))
+
+@query_app.command("interactive")
+def interactive():
+    """Launch interactive query mode."""
+    console.print(Panel("Interactive mode launched", title="Interactive", style="green"))
+    # Add interactive loop logic here if desired
+
+# ---------------------------------
+# Server Management Group
+# ---------------------------------
+server_app = typer.Typer(help="Commands for managing the model server.")
+app.add_typer(server_app, name="server")
+
+@server_app.command("start")
+def start_server():
+    """Start or restart the model server."""
+    console.print(Panel("Server started", title="Server", style="green"))
+
+@server_app.command("restart")
+def restart_server():
+    """Restart the server gracefully."""
+    console.print(Panel("Server restarted", title="Restart", style="green"))
+
+@server_app.command("stop")
+def stop_server():
+    """Stop the running server."""
+    console.print(Panel("Server stopped", title="Stop", style="green"))
+
+# ---------------------------------
+# Diagnostics & Monitoring Group
+# ---------------------------------
+diag_app = typer.Typer(help="Commands for diagnostics and monitoring.")
+app.add_typer(diag_app, name="diagnostics")
+
+@diag_app.command("status")
+def status():
+    """Display the current server status."""
+    console.print(Panel("Status executed", title="Status", style="green"))
+
+@diag_app.command("logs")
+def logs():
+    """Display recent logs."""
+    console.print(Panel("Logs executed", title="Logs", style="green"))
+
+@diag_app.command("health")
+def healthcheck():
+    """Perform a health check of the server."""
+    console.print(Panel("Health check executed", title="Healthcheck", style="green"))
+
+@diag_app.command("diagnose")
+def diagnose():
+    """Run diagnostics to troubleshoot issues."""
+    console.print(Panel("Diagnose executed", title="Diagnose", style="green"))
+
+# ---------------------------------
+# Maintenance Group
+# ---------------------------------
+maint_app = typer.Typer(help="Maintenance and update commands.")
+app.add_typer(maint_app, name="maintenance")
+
+@maint_app.command("update")
+def update_cli():
+    """Update the CLI or associated modules."""
+    console.print(Panel("CLI Update executed", title="Update", style="green"))
+
+@maint_app.command("backup")
+def backup():
+    """Create backups of configuration and checkpoints."""
+    console.print(Panel("Backup executed", title="Backup", style="green"))
+
+@maint_app.command("restore")
+def restore():
+    """Restore a backup configuration or model checkpoint."""
+    console.print(Panel("Restore executed", title="Restore", style="green"))
+
+# ---------------------------------
+# Configuration Group
+# ---------------------------------
+config_app = typer.Typer(help="View or modify configuration settings.")
+app.add_typer(config_app, name="config")
+
+@config_app.command("set")
+def set_config():
+    """Set configuration parameters."""
+    console.print(Panel("Config set executed", title="Config Set", style="green"))
+
+@config_app.command("info")
+def config_info():
+    """Display current configuration info."""
+    console.print(Panel("Config info executed", title="Config Info", style="green"))
+
+@config_app.command("version")
+def version():
+    """Display the CLI version."""
+    console.print(Panel("Version executed", title="Version", style="green"))
+
+if __name__ == "__main__":
+    app()