From 9beaf4b0cd1ab188c55deec7f136b3f6cb0a7f94 Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 16:25:59 -0800 Subject: [PATCH 01/10] init commit with txtai rag for recommend --- solo_server/commands/recommend.py | 140 ++++++++++++++++++ .../solodocs/Bug Report Template.docx | Bin 0 -> 9123 bytes 2 files changed, 140 insertions(+) create mode 100644 solo_server/commands/recommend.py create mode 100644 solo_server/commands/solodocs/Bug Report Template.docx diff --git a/solo_server/commands/recommend.py b/solo_server/commands/recommend.py new file mode 100644 index 0000000..19d4eec --- /dev/null +++ b/solo_server/commands/recommend.py @@ -0,0 +1,140 @@ +from txtai import RAG +from litgpt import LLM +from rich.console import Console +import os + +import nltk +nltk.download(['punkt', 'punkt_tab']) + +from txtai.pipeline import Textractor + +from txtai import Embeddings + + +# Create Textractor +textractor = Textractor() +text = textractor("solodocs/solo-server/solo_server/commands/solodocs/Bug Report Template.docx") +print(text) + + +def stream(path): + for f in sorted(os.listdir(path)): + fpath = os.path.join(path, f) + + # Only accept documents + if f.endswith(("docx", "xlsx", "pdf")): + print(f"Indexing {fpath}") + for paragraph in textractor(fpath): + yield paragraph + +# Document text extraction, split into paragraphs + +# Vector Database +embeddings = Embeddings(content=True) +embeddings.index(stream("solodocs")) + +console = Console() + +def recommend_based_on_docs(query: str): + """ + Generate a recommendation answer (with citations) based on the documentation context using a RAG pipeline. + + The function uses the following documentation context: + + "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. Additionally, having only the most relevant context helps the LLM generate higher quality answers. + + Citations for LLMs + A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model. + + txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match. + + for x in embeddings.search(result): + print(x['text']) + + E5-base-v2 + Image Captions BLIP + Labels - Zero Shot BART-Large-MNLI + Model Guide + |Component |Model(s)|Date Added| + |---|---|---| + |Embeddings |all-MiniLM-L6-v2|2022-04-15| + |Image Captions |BLIP|2022-03-17| + |Labels - Zero Shot |BART-Large-MNLI|2022-01-01| + |Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01| + |Summarization |DistilBART|2021-02-22| + |Text-to-Speech |ESPnet JETS|2022-08-01| + |Transcription |Whisper|2022-08-01| + |Translation |OPUS Model Series|2021-04-06| + + While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references. + The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. + This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference." + + The function prints the generated answer and the corresponding citation. + """ + # Documentation context to guide the answer + docs_context = ( + "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. " + "Additionally, having only the most relevant context helps the LLM generate higher quality answers.\n\n" + "Citations for LLMs:\n" + "A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model.\n\n" + "txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match.\n\n" + "for x in embeddings.search(result):\n print(x['text'])\n\n" + "E5-base-v2\n" + "Image Captions BLIP\n" + "Labels - Zero Shot BART-Large-MNLI\n\n" + "Model Guide\n" + "|Component |Model(s)|Date Added|\n" + "|---|---|---|\n" + "|Embeddings |all-MiniLM-L6-v2|2022-04-15|\n" + "|Image Captions |BLIP|2022-03-17|\n" + "|Labels - Zero Shot |BART-Large-MNLI|2022-01-01|\n" + "|Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01|\n" + "|Summarization |DistilBART|2021-02-22|\n" + "|Text-to-Speech |ESPnet JETS|2022-08-01|\n" + "|Transcription |Whisper|2022-08-01|\n" + "|Translation |OPUS Model Series|2021-04-06|\n\n" + "While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references.\n\n" + "The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. " + "This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference." + ) + + # Create a prompt that injects the documentation context + def prompt_with_context(question: str): + return [{ + "query": question, + "question": f""" +Answer the following question using only the context below. Only include information specifically discussed. + +question: {question} +context: +{docs_context} +""" + }] + + # Create the LLM instance with a system prompt template. + llm = LLM("TheBloke/Mistral-7B-OpenOrca-AWQ") + + # Create the RAG instance using txtai; the output mode "reference" will provide a reference id. + rag = RAG(embeddings, llm, output="reference") + + # Query the RAG pipeline with the prompt that includes the docs context. + result = rag(prompt_with_context(query), maxlength=4096, pad_token_id=32000)[0] + + console.print("ANSWER:", style="bold cyan") + console.print(result["answer"], style="white") + + # Retrieve and print citation text using the reference from the result. + citation = embeddings.search( + "select id, text from txtai where id = :id", + limit=1, + parameters={"id": result["reference"]} + ) + console.print("CITATION:", style="bold cyan") + console.print(citation, style="white") + +# Example usage: +if __name__ == "__main__": + # Test the function with a sample recommendation query. + test_query = "recommend how can I optimize model performance based on the docs provided?" + recommend_based_on_docs(test_query) diff --git a/solo_server/commands/solodocs/Bug Report Template.docx b/solo_server/commands/solodocs/Bug Report Template.docx new file mode 100644 index 0000000000000000000000000000000000000000..54ba636111ccc27b942c2c28fd63b091892700aa GIT binary patch literal 9123 zcmaJ{1C$-zvOZ&58QZpPW{gZGGa1{;*tTukw#{Uiv2E*3a__r;?*HDs)obl@x>r~4 z-L-4i{;Ix~mj(ev1wcYV0;aqTKLP#_Vw2U(M3-RS{oofn8&K$=U4Y`-8v8;K&zn5s31qaSq;RlF>B++;T&^t zeYKYw$W4N6DhPl|&)+_}a^N5a*%M@r61$4SX(k@+?{&-E7b@oraQF3^qElcYZN6g* zCK9)+=n49#?dEuad) zUG*}}*eXhcC5{_IRc%j47-tX{OPvTyG!fZjw~ShWz}}Z~#RkmOHd5%c#DrYgHL3wa z&XIB(#|6U-lxI~Hi1+bZabr$wXeFfJ4vabMB7~rwfHsIez7LE2=pxKME{ma|206EO zbdndWU_F_G)61fbn?YhE4vm$`;+wIRG4%4%OEtpDCU`{VDTs_EVD<@6s|nZ%^a!J% zpy%{i8BAuwQJD0#1Rz)*N%h$El~6i=GOg%3k$;Ux`66u*oYMD|0E8D#ADOlTNM0HO z($9JlGZzQ|NC5``5woQGX;moYF_38Fr!iH3a?q=hH z6H&XJYao3|uK})b5}D`Qcz`!TI(&gz!s`zY>bX%m`pV2S+6WPBDpB~Dzz;M*0?0}V zyX$v@cRMKRA7H=&WT0bwQ5e@9o_6wwVXNoTnP9{b$+jpU;_InGiF~PgvcE-zKpN5H z?M}6n(-FhpWUES(H2Wl+Qs&;G=G(LZ@rvzc%@G|TNy|kih@sE5>eoQ6gZx^0|3}?2`Bw{xQW|}u3B`p$W zbEs*A6C}w)=#9tdEYsX$FlLPcU;KfggQG$~_!t7C_2Z{dEPVYj|F24PGszy)?E#PW zt}qnH-SrwJeUF>?9k`;k2{8W{svl;#faxlQ~upe5sM=nW=;d4Z+R+x5^9yj78WK(X6%TBiNH=R+F_rt>T2yG1zmCKRi;#P8{n5* zzOxA~JC}1&=3t^hBnDJnyAWc=!gceDKZSqaBX$Eee|vW~2uKm$dL;p106-k_zq#MP zB8;JxrM>c3U2}uq{ueK8x%`g^1Nn0~F8sTM|BzsG)=FI|3q(>>oJbNqEG2b^JEbPV zJUp<5mgMI7o1w%j79UTCS^P1Wi;1k9>S!EE55s6arQPM@-r!~i37^+SRv8i}C*()G zMs2*5vMREBl9BL9mQhJGpHYgNGGwgh8LqFoRaHfVeIIFx!GHd+>@IUe+)hTUO!dYWL| z%Ou?f2+NjU@6>WOqR57D>x(mMew5OFqMLA6uLBx`9{=QxoSf|i7-5M}xG&)zfjZa+ z7q1jy`GgZtPvF|X?yAcs1Sgm{v$8W`5!M{C^d}%4%u`^Ye`w2u% z+X|$t5|FwKxSI!%t?n@zD`(h|bcWYra@3qb%mRR(CmQ{%;+(Y3Iar>7=Vc#+<=ial zX-(4y0$`69qJ|qx{V(>&cfcpfsKS zg#?Qj!wL_JDjG(v7%!fQ8ljhvoabfY&zL_AP^qqTI2K|F<&xRX@+Wae9&YW}koQ_> ztn~tNnS0?pRq2QGxm8Rqg0_Vuk`gFF76?Fu3}vI}fN1eAN!oI#w6%=Vg*5{@$d=d< zrRKiH+PRDah(jzrI@DGA7(C0L&Jxxw!Q)KQf!xyySYzs}f{W}MeCYj=;Q${&zXkff z%J&5My?~pe1I`1nF$%L-RW!5QDV%Jtb1zsa)@C7?L~aEfEw7hFeetKDnR4<>LOeyL zX+GWV!OX7at^n)%UCcD2eWU4v48Anufm0A!>Q1zr&AJ=%m?k zk6>(fCpdelqSIcMCuQ7n4|gx)k=Kf~AS%a}iv>S!u%W`D%_l00_`0_GdUH8CcMwmZ zh5UjX*pA%9H?~cWWg?LRGFrk$wN9_#n;$=h z@V`iGK3#Qe;s@g{bS5m!!dDks5!@c^ z7T*C?f66iUSfPRtq1^cl&vX=JdrL)`StIXIqT$eHCP@BDAi8!w3&yT^Tun%Z%RVeQ znYtYu>GiC;s-FM2=M7gR6f>#K&x84fZ|(%a;?cvepocqxbQhL}XA=qefNe{()#6#` z%6qm%C!AsR4BS76{i*nhSf~J1dNLBpIX$hvX)w6?M;<~hoEURrLKs1ARo4=i`J^y##fOH4vC3v z=UkVG^@RkH0l-r3ZjY`y-o>=*Q>zp&_3|ZU5=2(P=>+7FC>CIn4vft}gkJgLWp!Gz zcv-UWdAh{gG6Cl&qdtYp)vgc_5MeSi{Cs+{~X-z)7Kj(DrL&a&@hyN1FH_&pIPJgX{WVgNI}8iu0H`d!XZkCoMx7^ z-d@Y}o`Etr?9{Z6VrR#7%RIiaKO_PvBJK$xJU@h)lz0m34DTUODJ1@EX)s0%_gVlK2@=cX8rwJcO-E@hTDfy^uT|vIIV*H^y~U+dR1Fo*&Z1mAJ}| z5mbqFiaNe0^ca=Tz&2?rp*5{g(&zc|TWCm^EDyjZi&Y3)@(4c7BwqBrjt@B_j4S**JG~( z2z7MADdx-K>%NX*25L{+f=j_(G+xX(l-XFDi;HV1g@ajgX)gd1y&JT4GZ3Lo4tNu} z=u0+KGb)oT5b(tk+#05qKR-Q*Op&iUrgCyulbPy6`2Y(*`T+E8c&xPTW6~TaB8W*q z0Wu_8Oqw*^aC3fMgLP4sodG7YO_uY?MXo!%h#*<~v}*(3!2Ae1KR~9nzV}*#kA!#7 z##SK`v!V5*d?PN3UrBxR21J5FlG&v+b3@Q2{`?f4<;5>F*bN$ChquZNoRFy zExvE*iW6&3yVAi7?XFL2h!+oK4fpnH1A9@m`!Nzo6e1Ay{>H8bK^@1f_!Dy+#3&T1 zlT=|pbSL;bN31;}VsDJ1rjTN(L>lntB2GNAmLZtrimI<*fk_dKFHuqq2z+piaeu&v z_zrEyLD0TUnhjnbU4Fy`byI}o>k|xl4PDB%#-jveqnOxx!JgHb4W4fS2|^8C6lqPa z60t-CXMA06@mNo;2TNnxn9sDn4Wut5EDevsG%&*IcC{?wT zh)%#2s2&vceG=*7b68SL4_E+>cXk;4X}L@$h9|VGyN4wi&Pb(}dj8S3-cOyC6@5e6+?3 z552wVtp@U%ZAXQ3hHPD9|ACf{_U#JE8dkL$2jAD5!u#3z_9?$0XuAl4jwvB$?RkrS&dgMwJzFG)Oli9b0u@u`MULpGkqwQjc^MP5mwlk{I z?f7j#$P)F?NxA3|Y)`;$;U^JgxO^=a_xJ?*LrSoE1nYr4TZ2_@NxaGrEHSfjp#}C` zt6U{~!NL-pV5h_fY=@K;Lsz|1wh|4EA7!?PyD{f*t<1_A5C_3w6y$?K*ds5idxhA& zjnfrt3auqL{H}=`S6mH@ZrDG{%1r0zKR=r5B2$h~;yw6{L)h~!sCR_ByhI$TbXGZA zJv5|dw$C3w>?Z>RioOcC;V>uKdB+MHBIIPPLpAPSGd zHs*guNlc$Tt73(kWh<2oA}+9u7w$d9mOpIJB{X)-`;oFn!`_f!WO+j>K~ahmXTK(o z?s}jr#7Pcpekof*R$Cli2leV!lBz)zM4(Wv{XF7-f}nLMS|y2N!DIjiwOW;oZDCu~ zTo=LTn_C;aRPNK#F=x1@?-;sj&vrOM$UAJ8C|$x&$i_&QLo0AV#tD=*0^E#hYhEai zlHBR~L`=oiT9?_EXfPI4^s(q!{Bm^i%>z4!L7DBGh=NXaP8<&vgZc;tE+jPGK@+UM zf5e5{XH(Z;1xC=Ox@N*Bt4Vz^{yD0tNo$ey?$-&}&97GU@>h#?{@(5w93ya|ZPDzd zU9Y#|)DdQ;-Qc-(ef^-f{`~bD4|G%9idc1rrZ&{&=dcs1N&|W*ptM2y^;pJ+(ze^r z)$hMYjYMwNTRgDog_J7CuFDLqT=>l_s)TftR%(u-jt?HGS(E(>6~~(=xHD^Nu;A>I zJ=Tk+>1X$4;<`{=5)YNfROgBuF8+J33!In2+usc4!xAB!7ySz75Yz%>$MDw+BrQM6 zkJkS!S;)UUWBwDKoFsa)WisE@0VFOR2w7!UY?<{Bkc>k{ZvV3P6`hQ;nraP${9{314pl^ggYg+RQxxJnkqU%g@EgCp z)w3*Uit;>|=kekzL_l9%-is!sTfVgEsqmW^6na#Y_hV;X$CwA;<9bnXH=SCDSaV%0 zhe5>HrDDz9dO|Z5Cr6uZ{+wJ&_hBAWXg9y=Jzg(47HqvsXY%0EX&ga>b32#s4%)aD zgqJwnfJg43dRjSkUrWLE5odC^;xL;S5nef7#e!D@O=e%|XIp_oBdJhvJX>L}uqEKBta<-ztv*ueq zw9wLY%pcE9R=_2*zOke4h|x3Kp)*%bQ^EEe(+cW%On-d9C>*5JoG=4P#{S4?M7QfaOj zY$J9rH|h|5QW17cSqk5_rpunujqE~KMdoYHkI|d%ByIH|ZX&*J)x^j*id?eJ| z&J;JP6#`0R%~cquY&ovpjhMMC%G4H?!!zVnuyWq+SU;xJtjSM5PxFx!Wx~utFlkh! zSO)+t0@`m*6~h?`1utg{=bP55Mdu+nS^xxvRQt#|U>aLv+vpKd#c=g_GB??d<~%a8 zLiJQjMY^PN%SyXUuyI29AhfB^KNR7*g=_10ul+{QPtt=B0SzHUW25+0Li-(il^ep_ zB0Uh&F$+EhII?#T^0z#4bG}Dl?)J>cwYn`D{CCScZ*Y4b;tqd*i7Azs^YQw&(8fe}syx1-a@*DOpW)2uq;TeVT&HRuw@YBf)NMgCgX@!YIod5|E5 zWRW*RYVka73F~P#>HG%wts#2xxCd{SrPVf&X`BwlNp8hsYqp{l zN5W)ebQ4B<`=Fl$#H|QUrNMYQt>%>W>BV`|BaRUJ<156kOeX{^NV637z06wm-!dJX z|1EN8+ZveL(f^t6(EYkn9wTeo%Yf+rUQ-WRoUfV1mkLgkh1A#l2|{ssANYxwP*%$Q zDI|^r9(2|}@hte;)p*!kDn56R6>POrUe=H;qQt-t-g;=h(v>ajprI5~UJfBJVzGrd zgd+qTmco>U8@>=!AqBy~ajlh0Ktv9RUWOKu0~J5-a}21$kj-(o3m z>wT&!VGw`pU+^zhn7H_A8bzQ7pVdXBnKFVMP7zUa4D;6xDSjAwf-NP*Bw{Mu8K53m zOcr+jgq&^D7taS9tMa*wGiZ;LaNii=!^}^$piowbi<*5#XB=ynkXXLPME1pT&lPHz_dEV!wcnNspX7$`VSoi+N3jkZ;+7rtLNuKLBXkhm0Vr%1 zAdL%4I$z)F>j3(?jGyj|nosZ?$=Jh&od>guO$qk%)CNqz_coeVfh09JNDa@Tx_zLy zI!r%ugno9M(H9~DvkC5wvs+L~+4fV-8-cJlV0ttbr8D@>0{IOI>!6^bk);E>i(h7j z8oXK!F6t}PedNjDv;>dyxBC6dNEGd0qR=8n=BB5jBA5Eo9^f6?7w2gH-<2?0<4L0G z_xGTBuZI2SH+|2Q|3SCd8yi>{(En~3f2YJ6(pJlCIGqR!P6YN=28*hPpbjAm)yw6w z*d&&LcqYLK3-UPvGm>uDOIuz*y)9n@(H1NMziN7;FXTV5EOMFSPE}TQ;w4a~PUOlq z!j~hkNxrLCZy}kwn@|jT|iXj?v3DO)pzVt-k>X< zG=Lsr18Qy+F7}<+II5;@u5ip1wnLY67A!JYi$1nUNnhP_Qqpr<#4%amDXE?|YU|1u zh*KF|SDngvs=;(|ux6fqnG{J$+L*P)(zz9hvK8_b`7v#*=riGwA5X zWhF!Daul#|)um_zw0zu{HW60{J`xz(*2>yASGLLe8V>esS~zQ?Mtwg3yY{*3y{0f8 z?gF-!wy`4m!i~LhDg|~naZU|$|C;|J-ViO*TS#oH9n2nUl8K2_zi*M1xDD)ljs?h& zLUXhtjZbydmbT}vX&;0DhEA15%rqtn=@u~G9|jL53%1VcQ&0X7LynQ4s3GCvLsnwp zo~Bb2gt@&_i?7Y+>JxFw&)f1gI_(P~e&@+1uhnalrPO}^8rq5P9`fpPxe^+voqVA< z`1U$$)cNw5k6C2DepQM8c5!`iWySY)6&MJ9AJ8R?%a?@|y-&s|z|8~P3ky6XpBAo* zs9U0B-sj!NC}#q@p8-w1pNcY81vF2ng0PA-1uWB5y5)-I?eLH2u1fGk==^wz`)v~FAz8)V;?Stm>4W6806H?uIVHn$c*wNTca&1q@rV}E2-We zu6CimF;v7DkrdJ9tzxWb>qOKR(wpxi0pVx!{VF()EJagSQQ?lO4&H8|ae<_S$Un!G zY#VN@IpXKG>k6WHu!0d*AlEH%GX!PGG@F_jsdK&13}+wEpmeW$AhJWlQirVXR2soS zY&AnX=*AD1v??Sro9PYecCMTYn{aNQj_}D=8{ty7*OS~gh0#?2d6KDNhXQ@?c;g-& z2A}&=>lGQ^;4&!mrn{ZTTH0FwZd+N#WGCrlOI1039W&eXjM7~YQ$s0P*v}}9w*;Mx zwZMM!7^L{XXA-}!gcUw8pc4ZU);CZp8Xlr^>NbH2Pyl|`Uov5U*=*P;{@E34W;3y=VyDW<>Xh7#h7=rSiZaSjID#ju zo!`g&a%9PT)BEs2$C#{eHfP%QRhahpVJc=i$k}vw#QYP_mY!&z<3vl4bq`?nmDOtak2E$pPeLbp|JCQw8ku1Wm_-FLW}D+ zJ!CBw9AbgLG~TbzxaEf%krjPQ7_Tg5b+@2acjrZus|7OKJTweFI-N;3@+g&EPw<|h zH~y*YZ0^W|dyX5#M|JlNG}t*?B~H7U12kPT#fGxf^9c3(C*-p$9&XbKOB$Qoi_I@9 zixu7Trxz6s7}DS8P6-#yHzFzc+8}D!H(ZzvA3|hw5)c|h4bP*VVs(e~SCMVSWi+`$ zorWM4-mJ0aI3v;DuH#Z42?{$*ty~D z)QBcz=w2K(9ixtYVG13MLZY6-qB)y8SP{l+AO zpK|@ASz38jFjn2x96)sxkVAt(>{3C58bBG3%B%|MM| zl*zA!58f^Hb$xK|D-TSO&8+qg-`YH|Ui+j5t!O(3fJsDWR%tsH;n8U@dI{~YD4Ns} z6OUrbJu)@6Kv~Iv2$HhsX_X;c#(G0VN*%x|%UyIfq3qPFP>KK#)-PI(_ZzF>F7Oc8 zWk1&~U6yiJ9Ni)g6kKobH3Oux8M_gGfl=pblbIUz74i$$)=hXZ2>ODDnxn?EV2zna z{~!*Z5b0uYpKh;CUMR8ha=VD|3&k*3A~e#x@9VUnzl;q8j0*VcV~RgH_+M1#zsvvq zwBqmjf0C%bisgUF)_ae?_5bAU|E~Tg0r&6S>%I^6xB9>Fu7B76vx@(Vru$0_-o5J| z-T#ZU`@8a=!1&+Ct^@r~;{&NU_M{0k6#GlUbYrXv?tN8!#>XVlSdp9ir0QG(dz0>E?gul-I E2gmPX7XSbN literal 0 HcmV?d00001 From 4a90b8b2042d7b0a52219768e9e0ebe0f27af905 Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 17:07:06 -0800 Subject: [PATCH 02/10] updates with rich boxes and yes no setup --- solo_server/main.py | 327 +++++++++++++++++++------------------------- 1 file changed, 143 insertions(+), 184 deletions(-) diff --git a/solo_server/main.py b/solo_server/main.py index 3c7f76b..e5786c7 100644 --- a/solo_server/main.py +++ b/solo_server/main.py @@ -1,197 +1,156 @@ -import os -import json +import time import typer -import subprocess -import shutil import click -import sys - -from enum import Enum -from solo_server.config import CONFIG_PATH -from solo_server.utils.docker_utils import start_docker_engine -from solo_server.utils.hardware import detect_hardware, display_hardware_info, recommended_server -from solo_server.utils.nvidia import check_nvidia_toolkit, install_nvidia_toolkit_linux, install_nvidia_toolkit_windows -from solo_server.simple_setup import run_command, detect_gpu from rich.console import Console from rich.panel import Panel +from rich.theme import Theme +from rich import box +from litgpt import LLM # Requires: pip install 'litgpt[all]' + +app = typer.Typer() + +# Define a custom neon blue theme +solo_theme = Theme({ + "info": "bold bright_blue", + "warning": "bold magenta", + "success": "bold bright_blue", + "panel.border": "bright_blue", + "panel.title": "bright_cyan" +}) +console = Console(theme=solo_theme) -class ServerType(str, Enum): - OLLAMA = "Ollama" - VLLM = "vLLM" - LLAMACPP = "Llama.cpp" +def detect_hardware(): + """ + Dummy hardware detection function. + Replace with your actual hardware detection logic. + """ + cpu_model = "Intel i7" + cpu_cores = 8 + memory_gb = 16 # Example value + gpu_memory = 4 # Example value (in GB) + return cpu_model, cpu_cores, memory_gb, gpu_memory +def get_hardware_category(memory_gb: float) -> str: + if memory_gb < 8: + return "Fresh Adopter" + elif memory_gb < 16: + return "Mid Range" + elif memory_gb < 32: + return "High Performance" + else: + return "Maestro" + +@app.command() def setup(): - """Interactive setup for Solo Server environment""" - # Display hardware info - display_hardware_info(typer) - cpu_model, cpu_cores, memory_gb, gpu_vendor, gpu_model, gpu_memory, compute_backend, os_name = detect_hardware() + console.print("\n") - typer.echo("\nStarting Solo Server Setup...\n") - gpu = detect_gpu() - if gpu: - print("💻 Solo Sighting: GPU detected ->", gpu) - device_arg = "1" - else: - print("😎 Solo Mode: No GPU found; rocking CPU mode!") - device_arg = "0" + # Step 1: Hardware Detection & Categorization + typer.echo("Detecting hardware...") + cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware() + hardware_category = get_hardware_category(memory_gb) + hardware_info = ( + f"CPU: {cpu_model} ({cpu_cores} cores)\n" + f"Memory: {memory_gb} GB\n" + f"GPU Memory: {gpu_memory} GB\n" + f"Category: {hardware_category}" + ) + console.print( + Panel(hardware_info, title="Hardware Info", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + ) - # Ask for installation type - install_type = typer.prompt("Choose installation type:", type=click.Choice(['simple', 'advanced'], case_sensitive=False)) - typer.echo(f"Selected installation type: {install_type}") - - if install_type == "simple": - # Define port to use - port = "5070" - device_arg = "0" - accelerator_arg = "cpu" - - console = Console() - console.print("Solo setup: Installing optimal inference engine, hold tight...") - run_command(["litgpt", "download", "HuggingFaceTB/SmolLM2-135M-Instruct"], - spinner_message="Solo download in progress: Grabbing lightest model...") - console.print("\n") - - - console.print(Panel.fit( - f"🎉 LIVE: solo server is now live!\n" - f"🔗 Swagger docs available at: http://localhost:{port}/docs", - title="Solo Server", border_style="blue")) + # Step 2: Core Initialization Prompt + init_prompt = ( + "Continue to solo core initialization?\n" + "Yes: Proceed with full initialization and model setup\n" + "No: Exit setup" + ) + console.print( + Panel(init_prompt, title="Core Initialization", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + ) + if not typer.confirm("", default=True): + typer.echo("Exiting setup.") + raise typer.Exit() + + console.print("\n") + + # Step 3: Model Selection & Download Simulation + model_map = { + "Fresh Adopter": "SmolLM2-135M", + "Mid Range": "Qwen2.5-0.5B", + "High Performance": "microsoft/phi-2", + "Maestro": "Deepseek-r1" + } + selected_model = model_map.get(hardware_category, "SmolLM2-135M") + with console.status(f"Downloading model {selected_model}...", spinner="dots", spinner_style="bold bright_blue"): + time.sleep(3) # Simulate download delay + typer.echo(f"Model {selected_model} download complete.") + + console.print("\n") + + # Step 4: Advanced Modules Prompt + adv_prompt = ( + "Load advanced modules?\n" + "Yes: Load additional functionalities and module packs\n" + "No: Skip advanced modules" + ) + console.print( + Panel(adv_prompt, title="Advanced Modules", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + ) + if typer.confirm("", default=True): + module_pack_info = ( + "Choose module pack:\n" + "pro - Pro Pack: RAG, OCR, and Voice Models\n" + "industrial - Industrial Pack: CV, Search, and Video Models\n" + "robotics - Robotics Pack: ROS, OpenEMMA, and Advanced Models\n" + "custom ensemble - Custom Ensemble: Paid option for tailored modules\n" + "Enter your choice:" + ) console.print( - f"curl -X POST http://127.0.0.1:{port}/predict -H 'Content-Type: application/json' -d '{{\"prompt\": \"hello Solo\"}}'") - - command = [ - "litgpt", - "serve", - "HuggingFaceTB/SmolLM2-135M-Instruct", - "--port", port, - "--devices", device_arg, - "--accelerator", accelerator_arg - ] - - process = subprocess.Popen(command) - print(f"Command is running in the background with PID: {process.pid}") - else: - # Original code - recmd_server = recommended_server(memory_gb, gpu_vendor, gpu_memory) - - def server_type_prompt(value: str) -> ServerType: - normalized_value = value.lower() - for server in ServerType: - if server.value.lower() == normalized_value: - return server - raise typer.BadParameter(f"Invalid server type: {value}") - - server_choice = typer.prompt( - "\nChoose server", - type=server_type_prompt, - default=recmd_server, + Panel(module_pack_info, title="Module Pack Options", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) ) - - # GPU Configuration - use_gpu = False - if gpu_vendor in ["NVIDIA", "AMD", "Intel", "Apple Silicon"]: - use_gpu = True - if use_gpu and gpu_vendor == "NVIDIA": - if not check_nvidia_toolkit(os_name): - if typer.confirm("NVIDIA GPU Detected, but GPU drivers not found. Install now?", default=True): - if os_name == "Linux": - try: - install_nvidia_toolkit_linux() - except subprocess.CalledProcessError as e: - typer.echo(f"Failed to install NVIDIA toolkit: {e}", err=True) - use_gpu = False - elif os_name == "Windows": - try: - install_nvidia_toolkit_windows() - except subprocess.CalledProcessError as e: - typer.echo(f"Failed to install NVIDIA toolkit: {e}", err=True) - use_gpu = False - else: - typer.echo("Falling back to CPU inference.") - use_gpu = False - - # Save GPU configuration to config file - config = {} - if os.path.exists(CONFIG_PATH): - with open(CONFIG_PATH, 'r') as f: - config = json.load(f) - config['hardware'] = {'use_gpu': use_gpu} - with open(CONFIG_PATH, 'w') as f: - json.dump(config, f, indent=4) - - # Docker Engine Check for Docker-based servers - if server_choice in [ServerType.OLLAMA, ServerType.VLLM]: - # Check Docker installation - docker_path = shutil.which("docker") - if not docker_path: - typer.echo("Docker is not installed or not in the system PATH. Please install Docker first.\n", err=True) - typer.secho("Install Here: https://docs.docker.com/get-docker/", fg=typer.colors.GREEN) - raise typer.Exit(code=1) - - - try: - subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20) - except subprocess.CalledProcessError: - typer.echo("Docker daemon is not running. Attempting to start Docker...", err=True) - if not start_docker_engine(os_name): - raise typer.Exit(code=1) - # Re-check if Docker is running - try: - subprocess.run(["docker", "info"], check=True, capture_output=True, timeout=20) - except subprocess.CalledProcessError: - typer.echo("Try restarting the terminal with admin privileges and close any instances of podman.", err=True) - raise typer.Exit(code=1) - - - - # Server setup - try: - if server_choice == ServerType.VLLM: - # pull the appropriate vLLM image - typer.echo("Pulling vLLM image...") - if gpu_vendor == "NVIDIA" and use_gpu: - subprocess.run(["docker", "pull", "vllm/vllm-openai:latest"], check=True) - elif gpu_vendor == "AMD" and use_gpu: - subprocess.run(["docker", "pull", "rocm/vllm"], check=True) - elif cpu_model and "Apple" in cpu_model: - subprocess.run(["docker", "pull", "getsolo/vllm-arm"], check=True) - elif cpu_model and any(vendor in cpu_model for vendor in ["Intel", "AMD"]): - subprocess.run(["docker", "pull", "getsolo/vllm-cpu"], check=True) - else: - typer.echo("vLLM currently does not support your machine", err=True) - return False - - typer.secho( - "Solo server vLLM setup complete! Use 'solo serve -s vllm -m MODEL_NAME' to start the server.", - fg=typer.colors.BRIGHT_GREEN - ) - - elif server_choice == ServerType.OLLAMA: - # Just pull the Ollama image - typer.echo("Pulling Ollama image...") - if gpu_vendor == "AMD" and use_gpu: - subprocess.run(["docker", "pull", "ollama/ollama-rocm"], check=True) - else: - subprocess.run(["docker", "pull", "ollama/ollama"], check=True) - - typer.secho( - "Solo server ollama setup complete! \nUse 'solo serve -s ollama -m MODEL_NAME' to start the server.", - fg=typer.colors.BRIGHT_GREEN - ) - - elif server_choice == ServerType.LLAMACPP: - from solo_server.utils.server_utils import setup_llama_cpp_server - setup_success = setup_llama_cpp_server(use_gpu, gpu_vendor, os_name, install_only=True) - if setup_success: - typer.secho( - "Solo server llama.cpp setup complete! Use 'solo serve -s llama.cpp -m MODEL_PATH' to start the server.", - fg=typer.colors.BRIGHT_GREEN - ) - else: - typer.echo("Failed to setup llama.cpp", err=True) - except Exception as e: - typer.echo(f"\nSetup failed: {e}", err=True) - raise typer.Exit(code=1) + module_pack = typer.prompt("", type=click.Choice(["pro", "industrial", "robotics", "custom ensemble"], case_sensitive=False), default="pro") + typer.echo(f"Module pack selected: {module_pack}") + else: + typer.echo("Skipping advanced modules.") + + console.print("\n") + console.print( + Panel("Solo core initialization complete!", title="Setup Complete", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + ) + console.print("\n") + + # Step 5: Load the LLM using litgpt + console.print( + Panel(f"Loading LLM model: {selected_model}", title="LLM Load", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + ) + try: + llm = LLM.load(selected_model) + typer.echo("LLM loaded successfully.") + except Exception as e: + typer.echo(f"Failed to load LLM: {e}") + raise typer.Exit() + + # Step 6: Start the server on port 5070 + console.print( + Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + ) + try: + llm.serve(port=5070) + except Exception as e: + typer.echo(f"Failed to start server: {e}") + + # Step 7: Optionally Generate Text + prompt_text = typer.prompt( + "Enter a prompt to generate text (default: 'Fix the spelling: Every fall, the familly goes to the mountains.')", + default="Fix the spelling: Every fall, the familly goes to the mountains." + ) + typer.echo("Generating text...") + try: + generated_text = llm.generate(prompt_text) + typer.echo("\nGenerated text:") + typer.echo(generated_text) + except Exception as e: + typer.echo(f"Failed to generate text: {e}") if __name__ == "__main__": - typer.run(setup) \ No newline at end of file + app() From d5ed1687296a85cfc25be8234f51eb5a57ac66b9 Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 17:19:17 -0800 Subject: [PATCH 03/10] add docker management steps --- solo_server/ensemble.yaml | 9 +++ solo_server/main.py | 113 +++++++++++++++++++++++++++++++++++--- 2 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 solo_server/ensemble.yaml diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml new file mode 100644 index 0000000..61f0516 --- /dev/null +++ b/solo_server/ensemble.yaml @@ -0,0 +1,9 @@ +advanced_modules: true +hardware: + category: High Performance + cpu_cores: 8 + cpu_model: Intel i7 + gpu_memory: 4 + memory_gb: 16 +module_pack: pro +selected_model: microsoft/phi-2 diff --git a/solo_server/main.py b/solo_server/main.py index e5786c7..72c73b4 100644 --- a/solo_server/main.py +++ b/solo_server/main.py @@ -1,6 +1,8 @@ import time +import subprocess import typer import click +import yaml from rich.console import Console from rich.panel import Panel from rich.theme import Theme @@ -40,6 +42,73 @@ def get_hardware_category(memory_gb: float) -> str: else: return "Maestro" +def build_docker_ensemble(module_pack: str): + """ + Build an ensemble of Docker images for the selected module pack. + The Dockerfiles are organized in subfolders within the "containers" folder. + + Adjust this dictionary to match the folders in your "containers/" directory + and how you want them grouped by module pack. + """ + docker_modules = { + # Example grouping (adjust as needed): + "pro": [ + "rag", + "langchain", + "Transformers" + ], + "industrial": [ + "PyTorch", + "Tensorflow", + "vLLM" + ], + "robotics": [ + "ROS", + "LeRobot", + "OpenEMMA" + ], + # You can place additional folders here for a "custom ensemble" + "custom ensemble": [ + "Browser Use", + "Computer Use", + "Cosmos", + "homeassistant-core", + "JAX", + "LITA", + "llama-index" + ] + } + + modules = docker_modules.get(module_pack.lower(), []) + if not modules: + console.print(f"[magenta]No modules found for the '{module_pack}' pack. Adjust your dictionary as needed.[/magenta]") + return + + for module in modules: + console.print(f"[bright_blue]Building Docker image for module:[/bright_blue] {module}") + + # Replace spaces in the module name when creating the image tag + image_tag = module.lower().replace(' ', '-') + + # If your folder name has spaces, you may need to quote or escape them. + # Here we assume your OS can handle the direct string (Linux usually can with a directory rename). + build_path = f"./containers/{module}" + + try: + subprocess.run( + [ + "docker", + "build", + "-t", f"ensemble/{image_tag}", + build_path + ], + check=True, + capture_output=True + ) + console.print(f"[bright_cyan]Successfully built image for:[/bright_cyan] {module}") + except subprocess.CalledProcessError as e: + console.print(f"[warning]Docker build failed for module {module}: {e}[/warning]") + @app.command() def setup(): console.print("\n") @@ -96,13 +165,15 @@ def setup(): console.print( Panel(adv_prompt, title="Advanced Modules", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) ) - if typer.confirm("", default=True): + advanced_modules = typer.confirm("", default=True) + module_pack = None + if advanced_modules: module_pack_info = ( "Choose module pack:\n" - "pro - Pro Pack: RAG, OCR, and Voice Models\n" - "industrial - Industrial Pack: CV, Search, and Video Models\n" - "robotics - Robotics Pack: ROS, OpenEMMA, and Advanced Models\n" - "custom ensemble - Custom Ensemble: Paid option for tailored modules\n" + "pro - Pro Pack: RAG, LangChain, Transformers\n" + "industrial - Industrial Pack: PyTorch, Tensorflow, vLLM\n" + "robotics - Robotics Pack: ROS, LeRobot, OpenEMMA\n" + "custom ensemble - Custom Ensemble: A variety of additional containers\n" "Enter your choice:" ) console.print( @@ -113,13 +184,39 @@ def setup(): else: typer.echo("Skipping advanced modules.") + console.print("\n") + + # Step 5: Save Setup Information to ensemble.yaml + setup_info = { + "hardware": { + "cpu_model": cpu_model, + "cpu_cores": cpu_cores, + "memory_gb": memory_gb, + "gpu_memory": gpu_memory, + "category": hardware_category + }, + "selected_model": selected_model, + "advanced_modules": advanced_modules, + "module_pack": module_pack + } + with open("ensemble.yaml", "w") as f: + yaml.dump(setup_info, f) + typer.echo("Setup information saved to ensemble.yaml.") + + # Step 6: If advanced modules enabled, start Docker ensemble builds + if advanced_modules and module_pack: + console.print( + Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + ) + build_docker_ensemble(module_pack) + console.print("\n") console.print( Panel("Solo core initialization complete!", title="Setup Complete", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) ) console.print("\n") - # Step 5: Load the LLM using litgpt + # Step 7: Load the LLM using litgpt console.print( Panel(f"Loading LLM model: {selected_model}", title="LLM Load", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) ) @@ -130,7 +227,7 @@ def setup(): typer.echo(f"Failed to load LLM: {e}") raise typer.Exit() - # Step 6: Start the server on port 5070 + # Step 8: Start the server on port 5070 console.print( Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) ) @@ -139,7 +236,7 @@ def setup(): except Exception as e: typer.echo(f"Failed to start server: {e}") - # Step 7: Optionally Generate Text + # Step 9: Optionally Generate Text prompt_text = typer.prompt( "Enter a prompt to generate text (default: 'Fix the spelling: Every fall, the familly goes to the mountains.')", default="Fix the spelling: Every fall, the familly goes to the mountains." From 95fe815b8ce4462095370835a1799b21d21ef8fa Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 17:29:52 -0800 Subject: [PATCH 04/10] pr clean up --- solo_server/commands/recommend.py | 140 ------------------ .../solodocs/Bug Report Template.docx | Bin 9123 -> 0 bytes 2 files changed, 140 deletions(-) delete mode 100644 solo_server/commands/recommend.py delete mode 100644 solo_server/commands/solodocs/Bug Report Template.docx diff --git a/solo_server/commands/recommend.py b/solo_server/commands/recommend.py deleted file mode 100644 index 19d4eec..0000000 --- a/solo_server/commands/recommend.py +++ /dev/null @@ -1,140 +0,0 @@ -from txtai import RAG -from litgpt import LLM -from rich.console import Console -import os - -import nltk -nltk.download(['punkt', 'punkt_tab']) - -from txtai.pipeline import Textractor - -from txtai import Embeddings - - -# Create Textractor -textractor = Textractor() -text = textractor("solodocs/solo-server/solo_server/commands/solodocs/Bug Report Template.docx") -print(text) - - -def stream(path): - for f in sorted(os.listdir(path)): - fpath = os.path.join(path, f) - - # Only accept documents - if f.endswith(("docx", "xlsx", "pdf")): - print(f"Indexing {fpath}") - for paragraph in textractor(fpath): - yield paragraph - -# Document text extraction, split into paragraphs - -# Vector Database -embeddings = Embeddings(content=True) -embeddings.index(stream("solodocs")) - -console = Console() - -def recommend_based_on_docs(query: str): - """ - Generate a recommendation answer (with citations) based on the documentation context using a RAG pipeline. - - The function uses the following documentation context: - - "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. Additionally, having only the most relevant context helps the LLM generate higher quality answers. - - Citations for LLMs - A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model. - - txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match. - - for x in embeddings.search(result): - print(x['text']) - - E5-base-v2 - Image Captions BLIP - Labels - Zero Shot BART-Large-MNLI - Model Guide - |Component |Model(s)|Date Added| - |---|---|---| - |Embeddings |all-MiniLM-L6-v2|2022-04-15| - |Image Captions |BLIP|2022-03-17| - |Labels - Zero Shot |BART-Large-MNLI|2022-01-01| - |Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01| - |Summarization |DistilBART|2021-02-22| - |Text-to-Speech |ESPnet JETS|2022-08-01| - |Transcription |Whisper|2022-08-01| - |Translation |OPUS Model Series|2021-04-06| - - While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references. - The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. - This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference." - - The function prints the generated answer and the corresponding citation. - """ - # Documentation context to guide the answer - docs_context = ( - "As we discussed before, this is important when dealing with large volumes of data. Not all of the data can be added to a LLM prompt. " - "Additionally, having only the most relevant context helps the LLM generate higher quality answers.\n\n" - "Citations for LLMs:\n" - "A healthy level of skepticism should be applied to answers generated by AI. We're far from the day where we can blindly trust answers from an AI model.\n\n" - "txtai has a couple approaches for generating citations. The basic approach is to take the answer and search the vector database for the closest match.\n\n" - "for x in embeddings.search(result):\n print(x['text'])\n\n" - "E5-base-v2\n" - "Image Captions BLIP\n" - "Labels - Zero Shot BART-Large-MNLI\n\n" - "Model Guide\n" - "|Component |Model(s)|Date Added|\n" - "|---|---|---|\n" - "|Embeddings |all-MiniLM-L6-v2|2022-04-15|\n" - "|Image Captions |BLIP|2022-03-17|\n" - "|Labels - Zero Shot |BART-Large-MNLI|2022-01-01|\n" - "|Large Language Model (LLM) |Mistral 7B OpenOrca|2023-10-01|\n" - "|Summarization |DistilBART|2021-02-22|\n" - "|Text-to-Speech |ESPnet JETS|2022-08-01|\n" - "|Transcription |Whisper|2022-08-01|\n" - "|Translation |OPUS Model Series|2021-04-06|\n\n" - "While the basic approach above works in this case, txtai has a more robust pipeline to handle citations and references.\n\n" - "The RAG pipeline is defined below. A RAG pipeline works in the same way as a LLM + Vector Search pipeline, except it has special logic for generating citations. " - "This pipeline takes the answers and compares it to the context passed to the LLM to determine the most likely reference." - ) - - # Create a prompt that injects the documentation context - def prompt_with_context(question: str): - return [{ - "query": question, - "question": f""" -Answer the following question using only the context below. Only include information specifically discussed. - -question: {question} -context: -{docs_context} -""" - }] - - # Create the LLM instance with a system prompt template. - llm = LLM("TheBloke/Mistral-7B-OpenOrca-AWQ") - - # Create the RAG instance using txtai; the output mode "reference" will provide a reference id. - rag = RAG(embeddings, llm, output="reference") - - # Query the RAG pipeline with the prompt that includes the docs context. - result = rag(prompt_with_context(query), maxlength=4096, pad_token_id=32000)[0] - - console.print("ANSWER:", style="bold cyan") - console.print(result["answer"], style="white") - - # Retrieve and print citation text using the reference from the result. - citation = embeddings.search( - "select id, text from txtai where id = :id", - limit=1, - parameters={"id": result["reference"]} - ) - console.print("CITATION:", style="bold cyan") - console.print(citation, style="white") - -# Example usage: -if __name__ == "__main__": - # Test the function with a sample recommendation query. - test_query = "recommend how can I optimize model performance based on the docs provided?" - recommend_based_on_docs(test_query) diff --git a/solo_server/commands/solodocs/Bug Report Template.docx b/solo_server/commands/solodocs/Bug Report Template.docx deleted file mode 100644 index 54ba636111ccc27b942c2c28fd63b091892700aa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9123 zcmaJ{1C$-zvOZ&58QZpPW{gZGGa1{;*tTukw#{Uiv2E*3a__r;?*HDs)obl@x>r~4 z-L-4i{;Ix~mj(ev1wcYV0;aqTKLP#_Vw2U(M3-RS{oofn8&K$=U4Y`-8v8;K&zn5s31qaSq;RlF>B++;T&^t zeYKYw$W4N6DhPl|&)+_}a^N5a*%M@r61$4SX(k@+?{&-E7b@oraQF3^qElcYZN6g* zCK9)+=n49#?dEuad) zUG*}}*eXhcC5{_IRc%j47-tX{OPvTyG!fZjw~ShWz}}Z~#RkmOHd5%c#DrYgHL3wa z&XIB(#|6U-lxI~Hi1+bZabr$wXeFfJ4vabMB7~rwfHsIez7LE2=pxKME{ma|206EO zbdndWU_F_G)61fbn?YhE4vm$`;+wIRG4%4%OEtpDCU`{VDTs_EVD<@6s|nZ%^a!J% zpy%{i8BAuwQJD0#1Rz)*N%h$El~6i=GOg%3k$;Ux`66u*oYMD|0E8D#ADOlTNM0HO z($9JlGZzQ|NC5``5woQGX;moYF_38Fr!iH3a?q=hH z6H&XJYao3|uK})b5}D`Qcz`!TI(&gz!s`zY>bX%m`pV2S+6WPBDpB~Dzz;M*0?0}V zyX$v@cRMKRA7H=&WT0bwQ5e@9o_6wwVXNoTnP9{b$+jpU;_InGiF~PgvcE-zKpN5H z?M}6n(-FhpWUES(H2Wl+Qs&;G=G(LZ@rvzc%@G|TNy|kih@sE5>eoQ6gZx^0|3}?2`Bw{xQW|}u3B`p$W zbEs*A6C}w)=#9tdEYsX$FlLPcU;KfggQG$~_!t7C_2Z{dEPVYj|F24PGszy)?E#PW zt}qnH-SrwJeUF>?9k`;k2{8W{svl;#faxlQ~upe5sM=nW=;d4Z+R+x5^9yj78WK(X6%TBiNH=R+F_rt>T2yG1zmCKRi;#P8{n5* zzOxA~JC}1&=3t^hBnDJnyAWc=!gceDKZSqaBX$Eee|vW~2uKm$dL;p106-k_zq#MP zB8;JxrM>c3U2}uq{ueK8x%`g^1Nn0~F8sTM|BzsG)=FI|3q(>>oJbNqEG2b^JEbPV zJUp<5mgMI7o1w%j79UTCS^P1Wi;1k9>S!EE55s6arQPM@-r!~i37^+SRv8i}C*()G zMs2*5vMREBl9BL9mQhJGpHYgNGGwgh8LqFoRaHfVeIIFx!GHd+>@IUe+)hTUO!dYWL| z%Ou?f2+NjU@6>WOqR57D>x(mMew5OFqMLA6uLBx`9{=QxoSf|i7-5M}xG&)zfjZa+ z7q1jy`GgZtPvF|X?yAcs1Sgm{v$8W`5!M{C^d}%4%u`^Ye`w2u% z+X|$t5|FwKxSI!%t?n@zD`(h|bcWYra@3qb%mRR(CmQ{%;+(Y3Iar>7=Vc#+<=ial zX-(4y0$`69qJ|qx{V(>&cfcpfsKS zg#?Qj!wL_JDjG(v7%!fQ8ljhvoabfY&zL_AP^qqTI2K|F<&xRX@+Wae9&YW}koQ_> ztn~tNnS0?pRq2QGxm8Rqg0_Vuk`gFF76?Fu3}vI}fN1eAN!oI#w6%=Vg*5{@$d=d< zrRKiH+PRDah(jzrI@DGA7(C0L&Jxxw!Q)KQf!xyySYzs}f{W}MeCYj=;Q${&zXkff z%J&5My?~pe1I`1nF$%L-RW!5QDV%Jtb1zsa)@C7?L~aEfEw7hFeetKDnR4<>LOeyL zX+GWV!OX7at^n)%UCcD2eWU4v48Anufm0A!>Q1zr&AJ=%m?k zk6>(fCpdelqSIcMCuQ7n4|gx)k=Kf~AS%a}iv>S!u%W`D%_l00_`0_GdUH8CcMwmZ zh5UjX*pA%9H?~cWWg?LRGFrk$wN9_#n;$=h z@V`iGK3#Qe;s@g{bS5m!!dDks5!@c^ z7T*C?f66iUSfPRtq1^cl&vX=JdrL)`StIXIqT$eHCP@BDAi8!w3&yT^Tun%Z%RVeQ znYtYu>GiC;s-FM2=M7gR6f>#K&x84fZ|(%a;?cvepocqxbQhL}XA=qefNe{()#6#` z%6qm%C!AsR4BS76{i*nhSf~J1dNLBpIX$hvX)w6?M;<~hoEURrLKs1ARo4=i`J^y##fOH4vC3v z=UkVG^@RkH0l-r3ZjY`y-o>=*Q>zp&_3|ZU5=2(P=>+7FC>CIn4vft}gkJgLWp!Gz zcv-UWdAh{gG6Cl&qdtYp)vgc_5MeSi{Cs+{~X-z)7Kj(DrL&a&@hyN1FH_&pIPJgX{WVgNI}8iu0H`d!XZkCoMx7^ z-d@Y}o`Etr?9{Z6VrR#7%RIiaKO_PvBJK$xJU@h)lz0m34DTUODJ1@EX)s0%_gVlK2@=cX8rwJcO-E@hTDfy^uT|vIIV*H^y~U+dR1Fo*&Z1mAJ}| z5mbqFiaNe0^ca=Tz&2?rp*5{g(&zc|TWCm^EDyjZi&Y3)@(4c7BwqBrjt@B_j4S**JG~( z2z7MADdx-K>%NX*25L{+f=j_(G+xX(l-XFDi;HV1g@ajgX)gd1y&JT4GZ3Lo4tNu} z=u0+KGb)oT5b(tk+#05qKR-Q*Op&iUrgCyulbPy6`2Y(*`T+E8c&xPTW6~TaB8W*q z0Wu_8Oqw*^aC3fMgLP4sodG7YO_uY?MXo!%h#*<~v}*(3!2Ae1KR~9nzV}*#kA!#7 z##SK`v!V5*d?PN3UrBxR21J5FlG&v+b3@Q2{`?f4<;5>F*bN$ChquZNoRFy zExvE*iW6&3yVAi7?XFL2h!+oK4fpnH1A9@m`!Nzo6e1Ay{>H8bK^@1f_!Dy+#3&T1 zlT=|pbSL;bN31;}VsDJ1rjTN(L>lntB2GNAmLZtrimI<*fk_dKFHuqq2z+piaeu&v z_zrEyLD0TUnhjnbU4Fy`byI}o>k|xl4PDB%#-jveqnOxx!JgHb4W4fS2|^8C6lqPa z60t-CXMA06@mNo;2TNnxn9sDn4Wut5EDevsG%&*IcC{?wT zh)%#2s2&vceG=*7b68SL4_E+>cXk;4X}L@$h9|VGyN4wi&Pb(}dj8S3-cOyC6@5e6+?3 z552wVtp@U%ZAXQ3hHPD9|ACf{_U#JE8dkL$2jAD5!u#3z_9?$0XuAl4jwvB$?RkrS&dgMwJzFG)Oli9b0u@u`MULpGkqwQjc^MP5mwlk{I z?f7j#$P)F?NxA3|Y)`;$;U^JgxO^=a_xJ?*LrSoE1nYr4TZ2_@NxaGrEHSfjp#}C` zt6U{~!NL-pV5h_fY=@K;Lsz|1wh|4EA7!?PyD{f*t<1_A5C_3w6y$?K*ds5idxhA& zjnfrt3auqL{H}=`S6mH@ZrDG{%1r0zKR=r5B2$h~;yw6{L)h~!sCR_ByhI$TbXGZA zJv5|dw$C3w>?Z>RioOcC;V>uKdB+MHBIIPPLpAPSGd zHs*guNlc$Tt73(kWh<2oA}+9u7w$d9mOpIJB{X)-`;oFn!`_f!WO+j>K~ahmXTK(o z?s}jr#7Pcpekof*R$Cli2leV!lBz)zM4(Wv{XF7-f}nLMS|y2N!DIjiwOW;oZDCu~ zTo=LTn_C;aRPNK#F=x1@?-;sj&vrOM$UAJ8C|$x&$i_&QLo0AV#tD=*0^E#hYhEai zlHBR~L`=oiT9?_EXfPI4^s(q!{Bm^i%>z4!L7DBGh=NXaP8<&vgZc;tE+jPGK@+UM zf5e5{XH(Z;1xC=Ox@N*Bt4Vz^{yD0tNo$ey?$-&}&97GU@>h#?{@(5w93ya|ZPDzd zU9Y#|)DdQ;-Qc-(ef^-f{`~bD4|G%9idc1rrZ&{&=dcs1N&|W*ptM2y^;pJ+(ze^r z)$hMYjYMwNTRgDog_J7CuFDLqT=>l_s)TftR%(u-jt?HGS(E(>6~~(=xHD^Nu;A>I zJ=Tk+>1X$4;<`{=5)YNfROgBuF8+J33!In2+usc4!xAB!7ySz75Yz%>$MDw+BrQM6 zkJkS!S;)UUWBwDKoFsa)WisE@0VFOR2w7!UY?<{Bkc>k{ZvV3P6`hQ;nraP${9{314pl^ggYg+RQxxJnkqU%g@EgCp z)w3*Uit;>|=kekzL_l9%-is!sTfVgEsqmW^6na#Y_hV;X$CwA;<9bnXH=SCDSaV%0 zhe5>HrDDz9dO|Z5Cr6uZ{+wJ&_hBAWXg9y=Jzg(47HqvsXY%0EX&ga>b32#s4%)aD zgqJwnfJg43dRjSkUrWLE5odC^;xL;S5nef7#e!D@O=e%|XIp_oBdJhvJX>L}uqEKBta<-ztv*ueq zw9wLY%pcE9R=_2*zOke4h|x3Kp)*%bQ^EEe(+cW%On-d9C>*5JoG=4P#{S4?M7QfaOj zY$J9rH|h|5QW17cSqk5_rpunujqE~KMdoYHkI|d%ByIH|ZX&*J)x^j*id?eJ| z&J;JP6#`0R%~cquY&ovpjhMMC%G4H?!!zVnuyWq+SU;xJtjSM5PxFx!Wx~utFlkh! zSO)+t0@`m*6~h?`1utg{=bP55Mdu+nS^xxvRQt#|U>aLv+vpKd#c=g_GB??d<~%a8 zLiJQjMY^PN%SyXUuyI29AhfB^KNR7*g=_10ul+{QPtt=B0SzHUW25+0Li-(il^ep_ zB0Uh&F$+EhII?#T^0z#4bG}Dl?)J>cwYn`D{CCScZ*Y4b;tqd*i7Azs^YQw&(8fe}syx1-a@*DOpW)2uq;TeVT&HRuw@YBf)NMgCgX@!YIod5|E5 zWRW*RYVka73F~P#>HG%wts#2xxCd{SrPVf&X`BwlNp8hsYqp{l zN5W)ebQ4B<`=Fl$#H|QUrNMYQt>%>W>BV`|BaRUJ<156kOeX{^NV637z06wm-!dJX z|1EN8+ZveL(f^t6(EYkn9wTeo%Yf+rUQ-WRoUfV1mkLgkh1A#l2|{ssANYxwP*%$Q zDI|^r9(2|}@hte;)p*!kDn56R6>POrUe=H;qQt-t-g;=h(v>ajprI5~UJfBJVzGrd zgd+qTmco>U8@>=!AqBy~ajlh0Ktv9RUWOKu0~J5-a}21$kj-(o3m z>wT&!VGw`pU+^zhn7H_A8bzQ7pVdXBnKFVMP7zUa4D;6xDSjAwf-NP*Bw{Mu8K53m zOcr+jgq&^D7taS9tMa*wGiZ;LaNii=!^}^$piowbi<*5#XB=ynkXXLPME1pT&lPHz_dEV!wcnNspX7$`VSoi+N3jkZ;+7rtLNuKLBXkhm0Vr%1 zAdL%4I$z)F>j3(?jGyj|nosZ?$=Jh&od>guO$qk%)CNqz_coeVfh09JNDa@Tx_zLy zI!r%ugno9M(H9~DvkC5wvs+L~+4fV-8-cJlV0ttbr8D@>0{IOI>!6^bk);E>i(h7j z8oXK!F6t}PedNjDv;>dyxBC6dNEGd0qR=8n=BB5jBA5Eo9^f6?7w2gH-<2?0<4L0G z_xGTBuZI2SH+|2Q|3SCd8yi>{(En~3f2YJ6(pJlCIGqR!P6YN=28*hPpbjAm)yw6w z*d&&LcqYLK3-UPvGm>uDOIuz*y)9n@(H1NMziN7;FXTV5EOMFSPE}TQ;w4a~PUOlq z!j~hkNxrLCZy}kwn@|jT|iXj?v3DO)pzVt-k>X< zG=Lsr18Qy+F7}<+II5;@u5ip1wnLY67A!JYi$1nUNnhP_Qqpr<#4%amDXE?|YU|1u zh*KF|SDngvs=;(|ux6fqnG{J$+L*P)(zz9hvK8_b`7v#*=riGwA5X zWhF!Daul#|)um_zw0zu{HW60{J`xz(*2>yASGLLe8V>esS~zQ?Mtwg3yY{*3y{0f8 z?gF-!wy`4m!i~LhDg|~naZU|$|C;|J-ViO*TS#oH9n2nUl8K2_zi*M1xDD)ljs?h& zLUXhtjZbydmbT}vX&;0DhEA15%rqtn=@u~G9|jL53%1VcQ&0X7LynQ4s3GCvLsnwp zo~Bb2gt@&_i?7Y+>JxFw&)f1gI_(P~e&@+1uhnalrPO}^8rq5P9`fpPxe^+voqVA< z`1U$$)cNw5k6C2DepQM8c5!`iWySY)6&MJ9AJ8R?%a?@|y-&s|z|8~P3ky6XpBAo* zs9U0B-sj!NC}#q@p8-w1pNcY81vF2ng0PA-1uWB5y5)-I?eLH2u1fGk==^wz`)v~FAz8)V;?Stm>4W6806H?uIVHn$c*wNTca&1q@rV}E2-We zu6CimF;v7DkrdJ9tzxWb>qOKR(wpxi0pVx!{VF()EJagSQQ?lO4&H8|ae<_S$Un!G zY#VN@IpXKG>k6WHu!0d*AlEH%GX!PGG@F_jsdK&13}+wEpmeW$AhJWlQirVXR2soS zY&AnX=*AD1v??Sro9PYecCMTYn{aNQj_}D=8{ty7*OS~gh0#?2d6KDNhXQ@?c;g-& z2A}&=>lGQ^;4&!mrn{ZTTH0FwZd+N#WGCrlOI1039W&eXjM7~YQ$s0P*v}}9w*;Mx zwZMM!7^L{XXA-}!gcUw8pc4ZU);CZp8Xlr^>NbH2Pyl|`Uov5U*=*P;{@E34W;3y=VyDW<>Xh7#h7=rSiZaSjID#ju zo!`g&a%9PT)BEs2$C#{eHfP%QRhahpVJc=i$k}vw#QYP_mY!&z<3vl4bq`?nmDOtak2E$pPeLbp|JCQw8ku1Wm_-FLW}D+ zJ!CBw9AbgLG~TbzxaEf%krjPQ7_Tg5b+@2acjrZus|7OKJTweFI-N;3@+g&EPw<|h zH~y*YZ0^W|dyX5#M|JlNG}t*?B~H7U12kPT#fGxf^9c3(C*-p$9&XbKOB$Qoi_I@9 zixu7Trxz6s7}DS8P6-#yHzFzc+8}D!H(ZzvA3|hw5)c|h4bP*VVs(e~SCMVSWi+`$ zorWM4-mJ0aI3v;DuH#Z42?{$*ty~D z)QBcz=w2K(9ixtYVG13MLZY6-qB)y8SP{l+AO zpK|@ASz38jFjn2x96)sxkVAt(>{3C58bBG3%B%|MM| zl*zA!58f^Hb$xK|D-TSO&8+qg-`YH|Ui+j5t!O(3fJsDWR%tsH;n8U@dI{~YD4Ns} z6OUrbJu)@6Kv~Iv2$HhsX_X;c#(G0VN*%x|%UyIfq3qPFP>KK#)-PI(_ZzF>F7Oc8 zWk1&~U6yiJ9Ni)g6kKobH3Oux8M_gGfl=pblbIUz74i$$)=hXZ2>ODDnxn?EV2zna z{~!*Z5b0uYpKh;CUMR8ha=VD|3&k*3A~e#x@9VUnzl;q8j0*VcV~RgH_+M1#zsvvq zwBqmjf0C%bisgUF)_ae?_5bAU|E~Tg0r&6S>%I^6xB9>Fu7B76vx@(Vru$0_-o5J| z-T#ZU`@8a=!1&+Ct^@r~;{&NU_M{0k6#GlUbYrXv?tN8!#>XVlSdp9ir0QG(dz0>E?gul-I E2gmPX7XSbN From cba51fd60e21fa39ebb4bd3826c9c151c621e739 Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 18:30:53 -0800 Subject: [PATCH 05/10] update setup recommend --- solo_server/ensemble.yaml | 3 +- solo_server/main.py | 311 ++++++++++++++++++++++---------------- 2 files changed, 186 insertions(+), 128 deletions(-) diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml index 61f0516..49fbede 100644 --- a/solo_server/ensemble.yaml +++ b/solo_server/ensemble.yaml @@ -5,5 +5,6 @@ hardware: cpu_model: Intel i7 gpu_memory: 4 memory_gb: 16 +model_choice: null module_pack: pro -selected_model: microsoft/phi-2 +selected_model: microsoft/phi-4 diff --git a/solo_server/main.py b/solo_server/main.py index 72c73b4..e3cb6e7 100644 --- a/solo_server/main.py +++ b/solo_server/main.py @@ -7,19 +7,40 @@ from rich.panel import Panel from rich.theme import Theme from rich import box -from litgpt import LLM # Requires: pip install 'litgpt[all]' -app = typer.Typer() +app = typer.Typer(help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, and advanced module loading.") -# Define a custom neon blue theme -solo_theme = Theme({ - "info": "bold bright_blue", - "warning": "bold magenta", - "success": "bold bright_blue", +# Define a Google-inspired theme (blue, red, yellow, green) +google_theme = Theme({ + "header": "bold #4285F4", # Google Blue + "info": "bold #4285F4", # Google Blue + "warning": "bold #DB4437", # Google Red + "success": "bold #0F9D58", # Google Green + "prompt": "bold #F4B400", # Google Yellow "panel.border": "bright_blue", - "panel.title": "bright_cyan" + "panel.title": "bold white" }) -console = Console(theme=solo_theme) +console = Console(theme=google_theme) + +# Model options mapping (based on your table) +# Here we assume the "smallest fastest" option for each family: +MODEL_OPTIONS = { + "llama3": "meta-llama/Llama-3.1-1B-Instruct", # Smallest variant from Llama 3 family + "code_llama": "meta-llama/Code-Llama-7B", # Smallest variant for Code Llama + "codegemma": "google/CodeGemma-7B", # Only one variant for CodeGemma + "gemma2": "google/Gemma2-2B", # Smallest variant for Gemma 2 + "phi4": "microsoft/phi-4", # Only one option for Phi 4 (14B) + "qwen2.5": "qwen2.5/0.5B", # Smallest variant for Qwen2.5 + "qwen2.5_coder": "qwen2.5-coder/0.5B", # Smallest variant for Qwen2.5 Coder + "r1_distill_llama": "deepseek-ai/R1-Distill-Llama-8B" # Smallest variant for R1 Distill Llama +} + +def print_banner(): + """Display a header banner for the Solo Server CLI.""" + banner_text = """ + + """ + console.print(Panel(banner_text, style="header", border_style="panel.border", title="SOLO SERVER INIT", box=box.DOUBLE)) def detect_hardware(): """ @@ -42,16 +63,76 @@ def get_hardware_category(memory_gb: float) -> str: else: return "Maestro" -def build_docker_ensemble(module_pack: str): +def auto_select_model(hardware_category: str) -> str: + """ + Auto-select a default model based on hardware category. + For each situation, we recommend the smallest and fastest model available. + """ + mapping = { + "Fresh Adopter": MODEL_OPTIONS["llama3"], + "Mid Range": MODEL_OPTIONS["code_llama"], + "High Performance": MODEL_OPTIONS["phi4"], + "Maestro": MODEL_OPTIONS["r1_distill_llama"] + } + return mapping.get(hardware_category, MODEL_OPTIONS["llama3"]) + +def simulate_model_download(selected_model: str, sleep_time: int = 3): + """ + Simulate model download with a delay. + """ + with console.status(f"[info]Downloading model {selected_model}...[/info]", spinner="dots"): + time.sleep(sleep_time) # Simulate download delay + return f"[success]Model {selected_model} download complete.[/success]" + +def prompt_core_initialization(confirm_fn=typer.confirm) -> bool: + """ + Ask user to confirm core initialization. + """ + init_prompt = ( + "Continue to solo core initialization?\n" + "Yes: Proceed with full initialization and model setup\n" + "No: Exit setup" + ) + console.print( + Panel(init_prompt, title="Core Initialization", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) + ) + return confirm_fn("", default=True) + +def prompt_advanced_modules(confirm_fn=typer.confirm, prompt_fn=typer.prompt) -> (bool, str): + """ + Ask user if they want to load advanced modules and select module pack if yes. + Returns a tuple (advanced_modules, module_pack) + """ + adv_prompt = ( + "Load advanced modules?\n" + "Yes: Load additional functionalities and module packs\n" + "No: Skip advanced modules" + ) + console.print( + Panel(adv_prompt, title="Advanced Modules", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) + ) + advanced_modules = confirm_fn("", default=True) + module_pack = None + if advanced_modules: + module_pack_info = ( + "Choose module pack:\n" + "pro - Pro Pack: RAG, LangChain, Transformers\n" + "industrial - Industrial Pack: PyTorch, Tensorflow, vLLM\n" + "robotics - Robotics Pack: ROS, LeRobot, OpenEMMA\n" + "custom ensemble - Custom Ensemble: Additional containers\n" + "Enter your choice:" + ) + console.print( + Panel(module_pack_info, title="Module Pack Options", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) + ) + module_pack = prompt_fn("", type=click.Choice(["pro", "industrial", "robotics", "custom ensemble"], case_sensitive=False), default="pro") + return advanced_modules, module_pack + +def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run): """ Build an ensemble of Docker images for the selected module pack. - The Dockerfiles are organized in subfolders within the "containers" folder. - - Adjust this dictionary to match the folders in your "containers/" directory - and how you want them grouped by module pack. """ docker_modules = { - # Example grouping (adjust as needed): "pro": [ "rag", "langchain", @@ -67,7 +148,6 @@ def build_docker_ensemble(module_pack: str): "LeRobot", "OpenEMMA" ], - # You can place additional folders here for a "custom ensemble" "custom ensemble": [ "Browser Use", "Computer Use", @@ -78,24 +158,17 @@ def build_docker_ensemble(module_pack: str): "llama-index" ] } - modules = docker_modules.get(module_pack.lower(), []) if not modules: - console.print(f"[magenta]No modules found for the '{module_pack}' pack. Adjust your dictionary as needed.[/magenta]") + console.print(f"[warning]No modules found for the '{module_pack}' pack. Adjust your dictionary as needed.[/warning]") return - + for module in modules: - console.print(f"[bright_blue]Building Docker image for module:[/bright_blue] {module}") - - # Replace spaces in the module name when creating the image tag + console.print(f"[info]Building Docker image for module:[/info] {module}") image_tag = module.lower().replace(' ', '-') - - # If your folder name has spaces, you may need to quote or escape them. - # Here we assume your OS can handle the direct string (Linux usually can with a directory rename). build_path = f"./containers/{module}" - try: - subprocess.run( + run_subprocess_fn( [ "docker", "build", @@ -105,149 +178,133 @@ def build_docker_ensemble(module_pack: str): check=True, capture_output=True ) - console.print(f"[bright_cyan]Successfully built image for:[/bright_cyan] {module}") + console.print(f"[success]Successfully built image for:[/success] {module}") except subprocess.CalledProcessError as e: console.print(f"[warning]Docker build failed for module {module}: {e}[/warning]") +def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml"): + """ + Save setup information to a YAML file. + """ + with open(filename, "w") as f: + yaml.dump(setup_info, f) + return f"[success]Setup information saved to {filename}.[/success]" + +def serve_model(model: str, port: int = 5070, run_subprocess_fn=subprocess.run) -> str: + """ + Serve the model using the LitGPT CLI syntax. + Example: litgpt serve meta-llama/Llama-3.1-1B-Instruct --port 5070 + """ + try: + cmd = ["litgpt", "serve", model, "--port", str(port)] + run_subprocess_fn(cmd, check=True) + return f"[success]Server started on port {port} with model: {model}[/success]" + except subprocess.CalledProcessError as e: + return f"[warning]Failed to start server: {e}[/warning]" + +def get_hardware_info(): + """ + Get hardware information and categorization. + """ + cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware() + hardware_category = get_hardware_category(memory_gb) + hardware_info = { + "cpu_model": cpu_model, + "cpu_cores": cpu_cores, + "memory_gb": memory_gb, + "gpu_memory": gpu_memory, + "category": hardware_category + } + return hardware_info + @app.command() -def setup(): +def setup( + model_choice: str = typer.Option( + None, + "--model", + "-m", + help="Optional model choice. Options: " + ", ".join(MODEL_OPTIONS.keys()) + ) +): + """Run the full solo server setup.""" console.print("\n") - + print_banner() + console.print("\n") + # Step 1: Hardware Detection & Categorization - typer.echo("Detecting hardware...") - cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware() - hardware_category = get_hardware_category(memory_gb) - hardware_info = ( - f"CPU: {cpu_model} ({cpu_cores} cores)\n" - f"Memory: {memory_gb} GB\n" - f"GPU Memory: {gpu_memory} GB\n" - f"Category: {hardware_category}" + typer.echo("[info]Detecting hardware...[/info]") + hardware_info = get_hardware_info() + hardware_info_str = ( + f"CPU: {hardware_info['cpu_model']} ({hardware_info['cpu_cores']} cores)\n" + f"Memory: {hardware_info['memory_gb']} GB\n" + f"GPU Memory: {hardware_info['gpu_memory']} GB\n" + f"Category: {hardware_info['category']}" ) console.print( - Panel(hardware_info, title="Hardware Info", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + Panel(hardware_info_str, title="Hardware Info", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) ) # Step 2: Core Initialization Prompt - init_prompt = ( - "Continue to solo core initialization?\n" - "Yes: Proceed with full initialization and model setup\n" - "No: Exit setup" - ) - console.print( - Panel(init_prompt, title="Core Initialization", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) - ) - if not typer.confirm("", default=True): - typer.echo("Exiting setup.") + if not prompt_core_initialization(): + typer.echo("[warning]Exiting setup.[/warning]") raise typer.Exit() console.print("\n") # Step 3: Model Selection & Download Simulation - model_map = { - "Fresh Adopter": "SmolLM2-135M", - "Mid Range": "Qwen2.5-0.5B", - "High Performance": "microsoft/phi-2", - "Maestro": "Deepseek-r1" - } - selected_model = model_map.get(hardware_category, "SmolLM2-135M") - with console.status(f"Downloading model {selected_model}...", spinner="dots", spinner_style="bold bright_blue"): - time.sleep(3) # Simulate download delay - typer.echo(f"Model {selected_model} download complete.") + if model_choice: + # Use user provided model option if valid + selected_model = MODEL_OPTIONS.get(model_choice.lower()) + if not selected_model: + typer.echo(f"[warning]Invalid model choice: {model_choice}. Falling back to auto-selection.[/warning]") + selected_model = auto_select_model(hardware_info['category']) + else: + selected_model = auto_select_model(hardware_info['category']) + + download_message = simulate_model_download(selected_model) + typer.echo(download_message) console.print("\n") # Step 4: Advanced Modules Prompt - adv_prompt = ( - "Load advanced modules?\n" - "Yes: Load additional functionalities and module packs\n" - "No: Skip advanced modules" - ) - console.print( - Panel(adv_prompt, title="Advanced Modules", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) - ) - advanced_modules = typer.confirm("", default=True) - module_pack = None + advanced_modules, module_pack = prompt_advanced_modules() if advanced_modules: - module_pack_info = ( - "Choose module pack:\n" - "pro - Pro Pack: RAG, LangChain, Transformers\n" - "industrial - Industrial Pack: PyTorch, Tensorflow, vLLM\n" - "robotics - Robotics Pack: ROS, LeRobot, OpenEMMA\n" - "custom ensemble - Custom Ensemble: A variety of additional containers\n" - "Enter your choice:" - ) - console.print( - Panel(module_pack_info, title="Module Pack Options", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) - ) - module_pack = typer.prompt("", type=click.Choice(["pro", "industrial", "robotics", "custom ensemble"], case_sensitive=False), default="pro") - typer.echo(f"Module pack selected: {module_pack}") + typer.echo(f"[info]Module pack selected: {module_pack}[/info]") else: - typer.echo("Skipping advanced modules.") + typer.echo("[info]Skipping advanced modules.[/info]") console.print("\n") # Step 5: Save Setup Information to ensemble.yaml setup_info = { - "hardware": { - "cpu_model": cpu_model, - "cpu_cores": cpu_cores, - "memory_gb": memory_gb, - "gpu_memory": gpu_memory, - "category": hardware_category - }, + "hardware": hardware_info, "selected_model": selected_model, "advanced_modules": advanced_modules, - "module_pack": module_pack + "module_pack": module_pack, + "model_choice": model_choice } - with open("ensemble.yaml", "w") as f: - yaml.dump(setup_info, f) - typer.echo("Setup information saved to ensemble.yaml.") + save_message = save_setup_info(setup_info) + typer.echo(save_message) - # Step 6: If advanced modules enabled, start Docker ensemble builds + # Step 6: Docker Ensemble Build for Advanced Modules if advanced_modules and module_pack: console.print( - Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) ) build_docker_ensemble(module_pack) console.print("\n") console.print( - Panel("Solo core initialization complete!", title="Setup Complete", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) + Panel("Solo core initialization complete!", title="Setup Complete", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) ) console.print("\n") - # Step 7: Load the LLM using litgpt - console.print( - Panel(f"Loading LLM model: {selected_model}", title="LLM Load", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) - ) - try: - llm = LLM.load(selected_model) - typer.echo("LLM loaded successfully.") - except Exception as e: - typer.echo(f"Failed to load LLM: {e}") - raise typer.Exit() - - # Step 8: Start the server on port 5070 + # Step 7: Serve the Model using LitGPT CLI syntax console.print( - Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="bright_blue", box=box.ROUNDED, padding=(1, 2)) - ) - try: - llm.serve(port=5070) - except Exception as e: - typer.echo(f"Failed to start server: {e}") - - # Step 9: Optionally Generate Text - prompt_text = typer.prompt( - "Enter a prompt to generate text (default: 'Fix the spelling: Every fall, the familly goes to the mountains.')", - default="Fix the spelling: Every fall, the familly goes to the mountains." + Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) ) - typer.echo("Generating text...") - try: - generated_text = llm.generate(prompt_text) - typer.echo("\nGenerated text:") - typer.echo(generated_text) - except Exception as e: - typer.echo(f"Failed to generate text: {e}") + server_message = serve_model(selected_model, port=5070) + typer.echo(server_message) if __name__ == "__main__": app() From e2c2797190f720b320fbaf7d74572e224b0cb392 Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 19:07:01 -0800 Subject: [PATCH 06/10] add updated containers --- solo_server/ensemble.yaml | 12 ++- solo_server/main.py | 180 +++++++++++++++++++------------------- 2 files changed, 101 insertions(+), 91 deletions(-) diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml index 49fbede..eb52ced 100644 --- a/solo_server/ensemble.yaml +++ b/solo_server/ensemble.yaml @@ -1,10 +1,20 @@ advanced_modules: true +checkpoint_dir: checkpoints/HuggingFaceTB/SmolLM2-1.7B-Instruct +devices: 1 hardware: category: High Performance cpu_cores: 8 cpu_model: Intel i7 gpu_memory: 4 memory_gb: 16 +max_new_tokens: 50 model_choice: null module_pack: pro -selected_model: microsoft/phi-4 +port: 5070 +precision: null +quantize: null +selected_model: HuggingFaceTB/SmolLM2-1.7B-Instruct +stream: false +temperature: 0.8 +top_k: 50 +top_p: 1.0 diff --git a/solo_server/main.py b/solo_server/main.py index e3cb6e7..b501371 100644 --- a/solo_server/main.py +++ b/solo_server/main.py @@ -1,8 +1,10 @@ import time import subprocess +import socket import typer import click import yaml +from pathlib import Path from rich.console import Console from rich.panel import Panel from rich.theme import Theme @@ -10,35 +12,29 @@ app = typer.Typer(help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, and advanced module loading.") -# Define a Google-inspired theme (blue, red, yellow, green) +# Google-inspired theme google_theme = Theme({ - "header": "bold #4285F4", # Google Blue - "info": "bold #4285F4", # Google Blue - "warning": "bold #DB4437", # Google Red - "success": "bold #0F9D58", # Google Green - "prompt": "bold #F4B400", # Google Yellow + "header": "bold #4285F4", + "info": "bold #4285F4", + "warning": "bold #DB4437", + "success": "bold #0F9D58", "panel.border": "bright_blue", "panel.title": "bold white" }) console = Console(theme=google_theme) -# Model options mapping (based on your table) -# Here we assume the "smallest fastest" option for each family: -MODEL_OPTIONS = { - "llama3": "meta-llama/Llama-3.1-1B-Instruct", # Smallest variant from Llama 3 family - "code_llama": "meta-llama/Code-Llama-7B", # Smallest variant for Code Llama - "codegemma": "google/CodeGemma-7B", # Only one variant for CodeGemma - "gemma2": "google/Gemma2-2B", # Smallest variant for Gemma 2 - "phi4": "microsoft/phi-4", # Only one option for Phi 4 (14B) - "qwen2.5": "qwen2.5/0.5B", # Smallest variant for Qwen2.5 - "qwen2.5_coder": "qwen2.5-coder/0.5B", # Smallest variant for Qwen2.5 Coder - "r1_distill_llama": "deepseek-ai/R1-Distill-Llama-8B" # Smallest variant for R1 Distill Llama -} +# Hard-coded model and starting port +MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct" +START_PORT = 5070 def print_banner(): """Display a header banner for the Solo Server CLI.""" banner_text = """ - + ___ _ __ __ _ + / _ \(_)___ ___ / /___ / /_(_) + / , _/ / _ \/ -_) / / __/ / __/ / + /_/|_/_/ .__/\__/ /_/\__/ \__/_/ + /_/ """ console.print(Panel(banner_text, style="header", border_style="panel.border", title="SOLO SERVER INIT", box=box.DOUBLE)) @@ -63,26 +59,13 @@ def get_hardware_category(memory_gb: float) -> str: else: return "Maestro" -def auto_select_model(hardware_category: str) -> str: - """ - Auto-select a default model based on hardware category. - For each situation, we recommend the smallest and fastest model available. - """ - mapping = { - "Fresh Adopter": MODEL_OPTIONS["llama3"], - "Mid Range": MODEL_OPTIONS["code_llama"], - "High Performance": MODEL_OPTIONS["phi4"], - "Maestro": MODEL_OPTIONS["r1_distill_llama"] - } - return mapping.get(hardware_category, MODEL_OPTIONS["llama3"]) - -def simulate_model_download(selected_model: str, sleep_time: int = 3): +def simulate_model_download(model: str, sleep_time: int = 3) -> str: """ Simulate model download with a delay. """ - with console.status(f"[info]Downloading model {selected_model}...[/info]", spinner="dots"): - time.sleep(sleep_time) # Simulate download delay - return f"[success]Model {selected_model} download complete.[/success]" + with console.status(f"[info]Downloading model {model}...[/info]", spinner="dots"): + time.sleep(sleep_time) + return f"[success]Model {model} download complete.[/success]" def prompt_core_initialization(confirm_fn=typer.confirm) -> bool: """ @@ -131,6 +114,8 @@ def prompt_advanced_modules(confirm_fn=typer.confirm, prompt_fn=typer.prompt) -> def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run): """ Build an ensemble of Docker images for the selected module pack. + Checks if the Dockerfile directory exists. + Adjusted to use the path: commands/containers/ """ docker_modules = { "pro": [ @@ -164,17 +149,16 @@ def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run): return for module in modules: + # Update the build path to use the relative path from main.py. + build_path = Path("commands") / "containers" / module + if not build_path.exists(): + console.print(f"[warning]Path {build_path} does not exist. Skipping module {module}.[/warning]") + continue console.print(f"[info]Building Docker image for module:[/info] {module}") image_tag = module.lower().replace(' ', '-') - build_path = f"./containers/{module}" try: run_subprocess_fn( - [ - "docker", - "build", - "-t", f"ensemble/{image_tag}", - build_path - ], + ["docker", "build", "-t", f"ensemble/{image_tag}", str(build_path)], check=True, capture_output=True ) @@ -182,7 +166,7 @@ def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run): except subprocess.CalledProcessError as e: console.print(f"[warning]Docker build failed for module {module}: {e}[/warning]") -def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml"): +def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml") -> str: """ Save setup information to a YAML file. """ @@ -190,40 +174,62 @@ def save_setup_info(setup_info: dict, filename: str = "ensemble.yaml"): yaml.dump(setup_info, f) return f"[success]Setup information saved to {filename}.[/success]" -def serve_model(model: str, port: int = 5070, run_subprocess_fn=subprocess.run) -> str: +def get_available_port(start_port: int) -> int: + """ + Return the first available port starting from start_port. + """ + port = start_port + while True: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + try: + s.bind(("", port)) + return port + except OSError: + port += 1 + +def serve_model(model: str, port: int, run_subprocess_fn=subprocess.run) -> (str, int): """ Serve the model using the LitGPT CLI syntax. - Example: litgpt serve meta-llama/Llama-3.1-1B-Instruct --port 5070 + If the given port is in use, automatically increment to the next available port. + Returns a tuple of the success message and the port used. """ + available_port = get_available_port(port) try: - cmd = ["litgpt", "serve", model, "--port", str(port)] - run_subprocess_fn(cmd, check=True) - return f"[success]Server started on port {port} with model: {model}[/success]" + cmd = ["litgpt", "serve", model, "--port", str(available_port)] + run_subprocess_fn(cmd, check=True, capture_output=True, text=True) + success_msg = f"[success]Server started on port {available_port} with model: {model}[/success]" + # Print a sample curl command for testing. + test_curl = f"curl http://localhost:{available_port}/" + console.print(f"[info]You can test the server with: {test_curl}[/info]") + return success_msg, available_port except subprocess.CalledProcessError as e: - return f"[warning]Failed to start server: {e}[/warning]" + error_output = e.stderr.strip() if e.stderr else str(e) + console.print(f"ERROR: {error_output}") + return f"[warning]Failed to start server: {e}[/warning]", available_port -def get_hardware_info(): +def get_hardware_info() -> dict: """ Get hardware information and categorization. """ cpu_model, cpu_cores, memory_gb, gpu_memory = detect_hardware() hardware_category = get_hardware_category(memory_gb) - hardware_info = { + return { "cpu_model": cpu_model, "cpu_cores": cpu_cores, "memory_gb": memory_gb, "gpu_memory": gpu_memory, "category": hardware_category } - return hardware_info @app.command() def setup( + # Although the original flow allowed a model_choice, + # we now always use HuggingFaceTB/SmolLM2-1.7B-Instruct. model_choice: str = typer.Option( None, "--model", "-m", - help="Optional model choice. Options: " + ", ".join(MODEL_OPTIONS.keys()) + help="Optional model choice (ignored in this setup; always uses HuggingFaceTB/SmolLM2-1.7B-Instruct)" ) ): """Run the full solo server setup.""" @@ -232,7 +238,7 @@ def setup( console.print("\n") # Step 1: Hardware Detection & Categorization - typer.echo("[info]Detecting hardware...[/info]") + console.print("[info]Detecting hardware...[/info]") hardware_info = get_hardware_info() hardware_info_str = ( f"CPU: {hardware_info['cpu_model']} ({hardware_info['cpu_cores']} cores)\n" @@ -240,71 +246,65 @@ def setup( f"GPU Memory: {hardware_info['gpu_memory']} GB\n" f"Category: {hardware_info['category']}" ) - console.print( - Panel(hardware_info_str, title="Hardware Info", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) - ) + console.print(Panel(hardware_info_str, title="Hardware Info", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) # Step 2: Core Initialization Prompt if not prompt_core_initialization(): - typer.echo("[warning]Exiting setup.[/warning]") + console.print("[warning]Exiting setup.[/warning]") raise typer.Exit() console.print("\n") - # Step 3: Model Selection & Download Simulation - if model_choice: - # Use user provided model option if valid - selected_model = MODEL_OPTIONS.get(model_choice.lower()) - if not selected_model: - typer.echo(f"[warning]Invalid model choice: {model_choice}. Falling back to auto-selection.[/warning]") - selected_model = auto_select_model(hardware_info['category']) - else: - selected_model = auto_select_model(hardware_info['category']) - - download_message = simulate_model_download(selected_model) - typer.echo(download_message) + # Step 3: Model Download Simulation (always uses the specified model) + download_message = simulate_model_download(MODEL) + console.print(download_message) console.print("\n") - # Step 4: Advanced Modules Prompt + # Step 4: Advanced Modules Prompt (optional) advanced_modules, module_pack = prompt_advanced_modules() if advanced_modules: - typer.echo(f"[info]Module pack selected: {module_pack}[/info]") + console.print(f"[info]Module pack selected: {module_pack}[/info]") else: - typer.echo("[info]Skipping advanced modules.[/info]") + console.print("[info]Skipping advanced modules.[/info]") console.print("\n") - # Step 5: Save Setup Information to ensemble.yaml + # Step 5: Save Setup Information to YAML and print config details setup_info = { + "checkpoint_dir": str(Path("checkpoints") / MODEL), + "devices": 1, + "max_new_tokens": 50, + "port": START_PORT, # initial port, actual port may change + "precision": None, + "quantize": None, + "stream": False, + "temperature": 0.8, + "top_k": 50, + "top_p": 1.0, + "selected_model": MODEL, "hardware": hardware_info, - "selected_model": selected_model, "advanced_modules": advanced_modules, "module_pack": module_pack, "model_choice": model_choice } save_message = save_setup_info(setup_info) - typer.echo(save_message) + console.print(save_message) + console.print(setup_info) - # Step 6: Docker Ensemble Build for Advanced Modules + # Step 6: Docker Ensemble Build for Advanced Modules (if enabled) if advanced_modules and module_pack: - console.print( - Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) - ) + console.print(Panel("Starting Docker builds for advanced modules...", title="Docker Ensemble", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) build_docker_ensemble(module_pack) console.print("\n") - console.print( - Panel("Solo core initialization complete!", title="Setup Complete", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) - ) + console.print(Panel("Solo core initialization complete!", title="Setup Complete", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) console.print("\n") - # Step 7: Serve the Model using LitGPT CLI syntax - console.print( - Panel(f"Starting server on port 5070 with model: {selected_model}", title="Server", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) - ) - server_message = serve_model(selected_model, port=5070) - typer.echo(server_message) + # Step 7: Serve the Model using LitGPT CLI syntax and capture errors gracefully + console.print(Panel(f"Starting server with model: {MODEL}", title="Server", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + server_message, used_port = serve_model(MODEL, port=START_PORT) + console.print(server_message) if __name__ == "__main__": app() From b23fb75f799d9800ae2dff4665deaf3cf1c8d039 Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 19:21:50 -0800 Subject: [PATCH 07/10] changes to docker assemblage --- solo_server/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solo_server/main.py b/solo_server/main.py index b501371..6bf1e6a 100644 --- a/solo_server/main.py +++ b/solo_server/main.py @@ -158,7 +158,7 @@ def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run): image_tag = module.lower().replace(' ', '-') try: run_subprocess_fn( - ["docker", "build", "-t", f"ensemble/{image_tag}", str(build_path)], + ["docker", "build", "-t", f"containers/{image_tag}", str(build_path)], check=True, capture_output=True ) From b784628f0fceb0580203d9c2a6d637485448efa2 Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 19:33:46 -0800 Subject: [PATCH 08/10] add ensemble categories --- solo_server/ensemble.yaml | 2 +- solo_server/main.py | 116 +++++++++++++++++++------------------- 2 files changed, 60 insertions(+), 58 deletions(-) diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml index eb52ced..dc0979f 100644 --- a/solo_server/ensemble.yaml +++ b/solo_server/ensemble.yaml @@ -9,7 +9,7 @@ hardware: memory_gb: 16 max_new_tokens: 50 model_choice: null -module_pack: pro +module_pack: lean ensemble port: 5070 precision: null quantize: null diff --git a/solo_server/main.py b/solo_server/main.py index 6bf1e6a..55e0405 100644 --- a/solo_server/main.py +++ b/solo_server/main.py @@ -5,6 +5,7 @@ import click import yaml from pathlib import Path +from tqdm import tqdm from rich.console import Console from rich.panel import Panel from rich.theme import Theme @@ -61,10 +62,11 @@ def get_hardware_category(memory_gb: float) -> str: def simulate_model_download(model: str, sleep_time: int = 3) -> str: """ - Simulate model download with a delay. + Simulate model download with a progress bar. + (sleep_time is in seconds; e.g., 3 sec ~ 0.05 mins) """ - with console.status(f"[info]Downloading model {model}...[/info]", spinner="dots"): - time.sleep(sleep_time) + for _ in tqdm(range(sleep_time), desc="Downloading model (est. {:.2f} mins)".format(sleep_time/60), unit="sec", total=sleep_time): + time.sleep(1) return f"[success]Model {model} download complete.[/success]" def prompt_core_initialization(confirm_fn=typer.confirm) -> bool: @@ -76,80 +78,77 @@ def prompt_core_initialization(confirm_fn=typer.confirm) -> bool: "Yes: Proceed with full initialization and model setup\n" "No: Exit setup" ) - console.print( - Panel(init_prompt, title="Core Initialization", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) - ) + console.print(Panel(init_prompt, title="Core Initialization", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) return confirm_fn("", default=True) +def test_downloaded_model(model: str, run_subprocess_fn=subprocess.run) -> str: + """ + Prompt the user for a test prompt (defaulting to 'solo @@ test') and use the LitGPT CLI + to generate sample output from the downloaded model. + A progress bar shows the testing duration. + """ + test_prompt = typer.prompt("Enter test prompt", default="solo @@ test") + console.print(f"[info]Testing model {model} with prompt: '{test_prompt}'[/info]") + for _ in tqdm(range(5), desc="Testing model (est. 0.08 mins)", unit="sec", total=5): + time.sleep(1) + try: + # Assuming the LitGPT CLI provides a generate command. + cmd = ["litgpt", "generate", model, "--prompt", test_prompt] + result = run_subprocess_fn(cmd, check=True, capture_output=True, text=True) + output = result.stdout.strip() + console.print(f"[success]Test generation output:[/success]\n{output}") + return output + except subprocess.CalledProcessError as e: + error_output = e.stderr.strip() if e.stderr else str(e) + console.print(f"[warning]Test generation failed: {error_output}[/warning]") + return "" + def prompt_advanced_modules(confirm_fn=typer.confirm, prompt_fn=typer.prompt) -> (bool, str): """ - Ask user if they want to load advanced modules and select module pack if yes. + Ask user if they want to load advanced modules and select a vertical. + New verticals include: secure enterprise, healthcare, robotics, and lean ensemble. Returns a tuple (advanced_modules, module_pack) """ adv_prompt = ( "Load advanced modules?\n" - "Yes: Load additional functionalities and module packs\n" + "Yes: Load additional functionalities for a vertical\n" "No: Skip advanced modules" ) - console.print( - Panel(adv_prompt, title="Advanced Modules", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) - ) + console.print(Panel(adv_prompt, title="Advanced Modules", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) advanced_modules = confirm_fn("", default=True) module_pack = None if advanced_modules: module_pack_info = ( - "Choose module pack:\n" - "pro - Pro Pack: RAG, LangChain, Transformers\n" - "industrial - Industrial Pack: PyTorch, Tensorflow, vLLM\n" - "robotics - Robotics Pack: ROS, LeRobot, OpenEMMA\n" - "custom ensemble - Custom Ensemble: Additional containers\n" + "Choose advanced vertical:\n" + "secure enterprise - Modules for security and compliance\n" + "healthcare - Modules for healthcare applications\n" + "robotics - Modules for robotics integration\n" + "lean ensemble - A lean set of general modules\n" "Enter your choice:" ) - console.print( - Panel(module_pack_info, title="Module Pack Options", border_style="panel.border", box=box.ROUNDED, padding=(1, 2)) - ) - module_pack = prompt_fn("", type=click.Choice(["pro", "industrial", "robotics", "custom ensemble"], case_sensitive=False), default="pro") + console.print(Panel(module_pack_info, title="Vertical Options", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + module_pack = prompt_fn("", type=click.Choice(["secure enterprise", "healthcare", "robotics", "lean ensemble"], case_sensitive=False), default="lean ensemble") return advanced_modules, module_pack def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run): """ - Build an ensemble of Docker images for the selected module pack. - Checks if the Dockerfile directory exists. - Adjusted to use the path: commands/containers/ + Build an ensemble of Docker images for the selected vertical. + Uses the path: commands/containers/ (relative to main.py). + A tqdm progress bar shows the estimated duration. """ - docker_modules = { - "pro": [ - "rag", - "langchain", - "Transformers" - ], - "industrial": [ - "PyTorch", - "Tensorflow", - "vLLM" - ], - "robotics": [ - "ROS", - "LeRobot", - "OpenEMMA" - ], - "custom ensemble": [ - "Browser Use", - "Computer Use", - "Cosmos", - "homeassistant-core", - "JAX", - "LITA", - "llama-index" - ] + # New advanced module packs for different verticals + advanced_module_packs = { + "secure enterprise": ["auth", "data-encryption", "audit-log"], + "healthcare": ["hl7", "fhir-connector", "secure-patient"], + "robotics": ["ros", "le-robot", "robotics-core"], + "lean ensemble": ["microservice", "edge-ai", "light-transformers"] } - modules = docker_modules.get(module_pack.lower(), []) + modules = advanced_module_packs.get(module_pack.lower(), []) if not modules: - console.print(f"[warning]No modules found for the '{module_pack}' pack. Adjust your dictionary as needed.[/warning]") + console.print(f"[warning]No modules found for vertical '{module_pack}'.[/warning]") return - for module in modules: - # Update the build path to use the relative path from main.py. + for module in tqdm(modules, desc="Building Docker images (est. 2 mins/module)", unit="module", total=len(modules)): build_path = Path("commands") / "containers" / module if not build_path.exists(): console.print(f"[warning]Path {build_path} does not exist. Skipping module {module}.[/warning]") @@ -158,7 +157,7 @@ def build_docker_ensemble(module_pack: str, run_subprocess_fn=subprocess.run): image_tag = module.lower().replace(' ', '-') try: run_subprocess_fn( - ["docker", "build", "-t", f"containers/{image_tag}", str(build_path)], + ["docker", "build", "-t", f"ensemble/{image_tag}", str(build_path)], check=True, capture_output=True ) @@ -198,7 +197,6 @@ def serve_model(model: str, port: int, run_subprocess_fn=subprocess.run) -> (str cmd = ["litgpt", "serve", model, "--port", str(available_port)] run_subprocess_fn(cmd, check=True, capture_output=True, text=True) success_msg = f"[success]Server started on port {available_port} with model: {model}[/success]" - # Print a sample curl command for testing. test_curl = f"curl http://localhost:{available_port}/" console.print(f"[info]You can test the server with: {test_curl}[/info]") return success_msg, available_port @@ -223,8 +221,6 @@ def get_hardware_info() -> dict: @app.command() def setup( - # Although the original flow allowed a model_choice, - # we now always use HuggingFaceTB/SmolLM2-1.7B-Instruct. model_choice: str = typer.Option( None, "--model", @@ -261,10 +257,16 @@ def setup( console.print("\n") + # NEW STEP: Test the downloaded model using the solo @@ structure + console.print(Panel("Testing downloaded model...", title="Test Model", border_style="panel.border", box=box.ROUNDED, padding=(1, 2))) + test_output = test_downloaded_model(MODEL) + + console.print("\n") + # Step 4: Advanced Modules Prompt (optional) advanced_modules, module_pack = prompt_advanced_modules() if advanced_modules: - console.print(f"[info]Module pack selected: {module_pack}[/info]") + console.print(f"[info]Vertical selected: {module_pack}[/info]") else: console.print("[info]Skipping advanced modules.[/info]") From adbc81cadf0b4c8bb6d6aa46d4bb2294fe8087cf Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 19:47:05 -0800 Subject: [PATCH 09/10] over complicated setup --- solo_server/commands/query.py | 68 +++++++++++++++++++++++++++++++++++ solo_server/ensemble.yaml | 2 +- solo_server/main.py | 42 +++++++++++++++++++--- 3 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 solo_server/commands/query.py diff --git a/solo_server/commands/query.py b/solo_server/commands/query.py new file mode 100644 index 0000000..ab5a5e2 --- /dev/null +++ b/solo_server/commands/query.py @@ -0,0 +1,68 @@ +import sys +import typer +import requests +from litgpt import LLM +from rich.console import Console + +console = Console() + +CORE_SERVER_PORT = 5070 # Change this if your core server runs on a different port +CORE_SERVER_URL = f"http://localhost:{CORE_SERVER_PORT}/generate" + +def redirect_to_core_server(query: str, port: int = CORE_SERVER_PORT) -> None: + """ + Redirect the given query to the core server via an HTTP POST request. + """ + url = f"http://localhost:{port}/generate" + try: + response = requests.post(url, json={"prompt": query}) + response.raise_for_status() + console.print("[success]Response from core server:[/success]") + console.print(response.text) + except Exception as e: + console.print(f"[warning]Error redirecting to core server: {e}[/warning]") + +def query_llm(query: str) -> None: + """ + If the query exceeds 9000 characters, show an error. + Otherwise, load the model and generate a response. + """ + if len(query) > 9000: + typer.echo("Error: Your query exceeds the maximum allowed length of 9000 characters. It's over 9000!") + raise typer.Exit(1) + + # Load the model and generate a response while showing a spinner + llm = LLM.load("Qwen/Qwen2.5-1.5B-Instruct") + with console.status("Generating response...", spinner="dots"): + response = llm.generate(query) + typer.echo(response) + +def interactive_mode(): + console.print("Interactive Mode (type 'exit' or 'quit' to end):", style="bold green") + while True: + query_text = input(">> ").strip() + if query_text.lower() in ("exit", "quit"): + break + # If the query starts with "solo @@", redirect to the core server + if query_text.startswith("solo @@"): + # Remove the "solo @@" prefix before sending the query + core_query = query_text[len("solo @@"):].strip() + redirect_to_core_server(core_query) + else: + query_llm(query_text) + +if __name__ == "__main__": + # If invoked with "@@" as the first argument, treat the rest as the query. + # Otherwise, launch interactive mode. + if len(sys.argv) > 1 and sys.argv[1] == "@@": + if len(sys.argv) > 2: + query_text = " ".join(sys.argv[2:]).strip() + else: + typer.echo("Enter your query (end with EOF / Ctrl-D):") + query_text = sys.stdin.read().strip() + # If the query starts with "solo @@", remove that prefix. + if query_text.startswith("solo @@"): + query_text = query_text[len("solo @@"):].strip() + redirect_to_core_server(query_text) + else: + interactive_mode() diff --git a/solo_server/ensemble.yaml b/solo_server/ensemble.yaml index dc0979f..ded92a2 100644 --- a/solo_server/ensemble.yaml +++ b/solo_server/ensemble.yaml @@ -9,7 +9,7 @@ hardware: memory_gb: 16 max_new_tokens: 50 model_choice: null -module_pack: lean ensemble +module_pack: robotics port: 5070 precision: null quantize: null diff --git a/solo_server/main.py b/solo_server/main.py index 55e0405..05acc6a 100644 --- a/solo_server/main.py +++ b/solo_server/main.py @@ -1,6 +1,7 @@ import time import subprocess import socket +import sys import typer import click import yaml @@ -11,7 +12,11 @@ from rich.theme import Theme from rich import box -app = typer.Typer(help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, and advanced module loading.") +import commands.query + +app = typer.Typer( + help="Solo Server Setup CLI\nA polished CLI for hardware detection, model initialization, advanced module loading, and query redirection." +) # Google-inspired theme google_theme = Theme({ @@ -32,9 +37,9 @@ def print_banner(): """Display a header banner for the Solo Server CLI.""" banner_text = """ ___ _ __ __ _ - / _ \(_)___ ___ / /___ / /_(_) - / , _/ / _ \/ -_) / / __/ / __/ / - /_/|_/_/ .__/\__/ /_/\__/ \__/_/ + / _ \\(_)___ ___ / /___ / /_(_) + / , _/ / _ \\/ -_) / / __/ / __/ / + /_/|_/_/ .__/\\__/ /_/\\__/ \\__/_/ /_/ """ console.print(Panel(banner_text, style="header", border_style="panel.border", title="SOLO SERVER INIT", box=box.DOUBLE)) @@ -65,7 +70,9 @@ def simulate_model_download(model: str, sleep_time: int = 3) -> str: Simulate model download with a progress bar. (sleep_time is in seconds; e.g., 3 sec ~ 0.05 mins) """ - for _ in tqdm(range(sleep_time), desc="Downloading model (est. {:.2f} mins)".format(sleep_time/60), unit="sec", total=sleep_time): + for _ in tqdm(range(sleep_time), + desc="Downloading model (est. {:.2f} mins)".format(sleep_time/60), + unit="sec", total=sleep_time): time.sleep(1) return f"[success]Model {model} download complete.[/success]" @@ -308,5 +315,30 @@ def setup( server_message, used_port = serve_model(MODEL, port=START_PORT) console.print(server_message) +@app.command() +def query(query: str = typer.Argument( + None, + help="Query for the LLM. If omitted, interactive mode is launched." +)): + """ + Redirect queries to the appropriate functions in query.py. + If a query is provided, it is processed; otherwise, interactive mode is launched. + If the query starts with 'solo @@', the prefix is stripped and the core server is used. + """ + try: + from commands.query import query_llm, redirect_to_core_server, interactive_mode + except ModuleNotFoundError: + console.print("[warning]Module 'query' not found. Please ensure query.py is in the same directory.[/warning]") + raise typer.Exit(1) + + if query is None: + interactive_mode() + else: + if query.startswith("solo @@"): + core_query = query[len("solo @@"):].strip() + redirect_to_core_server(core_query) + else: + query_llm(query) + if __name__ == "__main__": app() From aefaacf00c8c0feea074fb1cf905472ec29acec7 Mon Sep 17 00:00:00 2001 From: ddiddi Date: Thu, 6 Mar 2025 20:32:00 -0800 Subject: [PATCH 10/10] add commands to consider for roadmap --- solo_server/advanced_cmd.py | 51 +++++++++++ solo_server/explorative_cmd.py | 155 +++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 solo_server/advanced_cmd.py create mode 100644 solo_server/explorative_cmd.py diff --git a/solo_server/advanced_cmd.py b/solo_server/advanced_cmd.py new file mode 100644 index 0000000..0ece57b --- /dev/null +++ b/solo_server/advanced_cmd.py @@ -0,0 +1,51 @@ +import typer +from rich.console import Console +from rich.panel import Panel + +app = typer.Typer(help="CLI for Advanced Model Operations and Model Export/Optimization") +console = Console() + +# ------------------------------- +# Advanced Model Operations Group +# ------------------------------- +advanced_app = typer.Typer(help="Commands for benchmarking, profiling, and stress testing your model.") +app.add_typer(advanced_app, name="advanced") + +@advanced_app.command("benchmark") +def benchmark(): + """Run performance benchmarks on the model.""" + console.print(Panel("Benchmark command executed", title="Benchmark", style="blue")) + +@advanced_app.command("profile") +def profile(): + """Profile model resource usage.""" + console.print(Panel("Profile command executed", title="Profile", style="blue")) + +@advanced_app.command("stress-test") +def stress_test(): + """Stress test the model and server under high-load conditions.""" + console.print(Panel("Stress-Test command executed", title="Stress Test", style="blue")) + +# ------------------------------- +# Model Export & Optimization Group +# ------------------------------- +optimization_app = typer.Typer(help="Commands for exporting, quantizing, and fine-tuning the model.") +app.add_typer(optimization_app, name="optimization") + +@optimization_app.command("export") +def export_model(): + """Export the model to various formats (e.g., ONNX, TensorRT, CoreML).""" + console.print(Panel("Export command executed", title="Export", style="green")) + +@optimization_app.command("quantize") +def quantize(): + """Apply quantization to reduce model size and improve efficiency.""" + console.print(Panel("Quantize command executed", title="Quantize", style="green")) + +@optimization_app.command("finetune") +def finetune(): + """Fine-tune the model on custom datasets with specified hyperparameters.""" + console.print(Panel("Finetune command executed", title="Finetune", style="green")) + +if __name__ == "__main__": + app() diff --git a/solo_server/explorative_cmd.py b/solo_server/explorative_cmd.py new file mode 100644 index 0000000..93d5c29 --- /dev/null +++ b/solo_server/explorative_cmd.py @@ -0,0 +1,155 @@ +import typer +from rich.console import Console +from rich.panel import Panel + +app = typer.Typer(help="Solo CLI - A comprehensive tool for model management and server operations.") +console = Console() + +# --------------------------------- +# Setup Commands Group +# --------------------------------- +setup_app = typer.Typer(help="Commands for initializing and setting up the environment.") +app.add_typer(setup_app, name="setup") + +@setup_app.command("full") +def full_setup(): + """Run full server setup.""" + console.print(Panel("Full Setup executed", title="Setup", style="green")) + +@setup_app.command("init") +def init(): + """Reinitialize core components.""" + console.print(Panel("Init executed", title="Init", style="green")) + +# --------------------------------- +# Model Management Group +# --------------------------------- +model_app = typer.Typer(help="Manage model downloads, updates, and tests.") +app.add_typer(model_app, name="model") + +@model_app.command("download") +def download_model(): + """Download or update the model.""" + console.print(Panel("Download executed", title="Download", style="green")) + +@model_app.command("update") +def update_model(): + """Update the model to the latest version.""" + console.print(Panel("Update Model executed", title="Update Model", style="green")) + +@model_app.command("test") +def test_model(): + """Test the downloaded model with a sample prompt.""" + console.print(Panel("Test executed", title="Test", style="green")) + +# --------------------------------- +# Query & Interaction Group +# --------------------------------- +query_app = typer.Typer(help="Handle one-off queries or launch interactive mode.") +app.add_typer(query_app, name="query") + +@query_app.command("ask") +def ask(query: str = typer.Argument(..., help="Query for the model")): + """Send a query to the model.""" + # Check for "solo @@" prefix and adjust query if necessary + if query.startswith("solo @@"): + query = query[len("solo @@"):].strip() + console.print(Panel(f"Query: {query}", title="Query", style="green")) + +@query_app.command("interactive") +def interactive(): + """Launch interactive query mode.""" + console.print(Panel("Interactive mode launched", title="Interactive", style="green")) + # Add interactive loop logic here if desired + +# --------------------------------- +# Server Management Group +# --------------------------------- +server_app = typer.Typer(help="Commands for managing the model server.") +app.add_typer(server_app, name="server") + +@server_app.command("start") +def start_server(): + """Start or restart the model server.""" + console.print(Panel("Server started", title="Server", style="green")) + +@server_app.command("restart") +def restart_server(): + """Restart the server gracefully.""" + console.print(Panel("Server restarted", title="Restart", style="green")) + +@server_app.command("stop") +def stop_server(): + """Stop the running server.""" + console.print(Panel("Server stopped", title="Stop", style="green")) + +# --------------------------------- +# Diagnostics & Monitoring Group +# --------------------------------- +diag_app = typer.Typer(help="Commands for diagnostics and monitoring.") +app.add_typer(diag_app, name="diagnostics") + +@diag_app.command("status") +def status(): + """Display the current server status.""" + console.print(Panel("Status executed", title="Status", style="green")) + +@diag_app.command("logs") +def logs(): + """Display recent logs.""" + console.print(Panel("Logs executed", title="Logs", style="green")) + +@diag_app.command("health") +def healthcheck(): + """Perform a health check of the server.""" + console.print(Panel("Health check executed", title="Healthcheck", style="green")) + +@diag_app.command("diagnose") +def diagnose(): + """Run diagnostics to troubleshoot issues.""" + console.print(Panel("Diagnose executed", title="Diagnose", style="green")) + +# --------------------------------- +# Maintenance Group +# --------------------------------- +maint_app = typer.Typer(help="Maintenance and update commands.") +app.add_typer(maint_app, name="maintenance") + +@maint_app.command("update") +def update_cli(): + """Update the CLI or associated modules.""" + console.print(Panel("CLI Update executed", title="Update", style="green")) + +@maint_app.command("backup") +def backup(): + """Create backups of configuration and checkpoints.""" + console.print(Panel("Backup executed", title="Backup", style="green")) + +@maint_app.command("restore") +def restore(): + """Restore a backup configuration or model checkpoint.""" + console.print(Panel("Restore executed", title="Restore", style="green")) + +# --------------------------------- +# Configuration Group +# --------------------------------- +config_app = typer.Typer(help="View or modify configuration settings.") +app.add_typer(config_app, name="config") + +@config_app.command("set") +def set_config(): + """Set configuration parameters.""" + console.print(Panel("Config set executed", title="Config Set", style="green")) + +@config_app.command("info") +def config_info(): + """Display current configuration info.""" + console.print(Panel("Config info executed", title="Config Info", style="green")) + +@config_app.command("version") +def version(): + """Display the CLI version.""" + console.print(Panel("Version executed", title="Version", style="green")) + +if __name__ == "__main__": + app()