diff --git a/README.md b/README.md index 3302854..682ff56 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ Converts [Sefaria-Export](https://github.com/Sefaria/Sefaria-Export) to SQLite d ## How to use 1. git clone https://github.com/Sefaria/Sefaria-SQL.git 2. git clone https://github.com/Sefaria/Sefaria-Export.git (into the same dir that Sefaria-SQL is in) -3. Go to scripts/links and run: pytyhon2 createLinks.py -4. Go to scripts/fileList and run: pytyhon2 createFileList.py -5. (Not really needed b/c headers are part of clone) go to Sefaria-SQL/scripts/headers and run: pytyhon2 createHeaders.py +3. Go to scripts/links and run: python2 createLinks.py +4. Go to scripts/fileList and run: python2 createFileList.py +5. (Not really needed b/c headers are part of clone) go to Sefaria-SQL/scripts/headers and run: python2 createHeaders.py 6. Open Sefaria-SQL in [Eclipse](http://www.eclipse.org/downloads/) for Java (File -> import -> Existing Projects into Workspace) 7. In src/SQLite.java, you can change variables 8. Run project @@ -18,9 +18,9 @@ Converts [Sefaria-Export](https://github.com/Sefaria/Sefaria-Export) to SQLite d The java code is in src/ -SQLite.java is the highest level code (it run at startup). Book.java contains methods for inputing the data about each book into the database. Simularly, Header, Link, Searching, and Text are responsible for putting their respective items into the database (in their own table). Node.java is responsible for putting in Nodes for complex texts and/or alternate structures. +SQLite.java is the highest level code (it run at startup). Book.java contains methods for inputting the data about each book into the database. Similarly, Header, Link, Searching, and Text are responsible for putting their respective items into the database (in their own table). Node.java is responsible for putting in Nodes for complex texts and/or alternate structures. -There are some preprocessing python srcipts in scripts/ +There are some preprocessing python scripts in scripts/ scripts/fileList/createFileList.py creates a list of files to be upload based on the index and exported files. diff --git a/preDatabaseUpload.py b/preDatabaseUpload.py index a38d710..1dedca7 100755 --- a/preDatabaseUpload.py +++ b/preDatabaseUpload.py @@ -50,7 +50,7 @@ def links(): thisLine += convert2Levels(row[0].replace(row[3] + " " ,"").split(':'), row[5], row[0]) thisLine += [row[4]] thisLine += convert2Levels(row[1].replace(row[4] + " " ,"").split(':'), row[6], row[1]) - thisLine += [conncetionType(row[2])] + thisLine += [connectionType(row[2])] writer.writerow(thisLine) numberOfLines += 1 in_file_number += 1 @@ -81,7 +81,7 @@ def daf2Num(daf): return value; -def conncetionType(connString): +def connectionType(connString): return connString[0:3].lower() #map = {'quotation': 1, 'commentary':2, 'reference':3,'related':4,'midrash':5, 'allusion':6, 'mesorat hashas': 7, 'summary':8, 'Law':9,'ein mishpat':10,'Liturgy':11,'explication':12, 'targum':13,'Ellucidation':14}; #if connString not in map.keys(): @@ -134,16 +134,16 @@ def createFileList(): def reorderFiles(unordered): - #note: this only works for the list of Commentaries that serperated from the rest of the list. + #note: this only works for the list of Commentaries that separated from the rest of the list. # so it doesn't include Other/Commentary2 specials = [ '^Tanakh/Torah/','^Tanakh/Prophets/', '^Tanakh/Writings/', '^Mishnah/Seder ', '^Talmud/Bavli/Seder ', '^Talmud/Yerushalmi/Seder ', '^Tanakh/Commentary/Rashi', '^Tanakh/Targum/Onkelos', '^Tanakh/Commentary/Ibn Ezra', '^Tanakh/Commentary/Ramban', '^Tanakh/Commentary/Sforno', '^Tanakh/Commentary/Rashbam', - '^Tanakh/Commentary', '^Tanakh/Commentary/', '^Other/Commentary2/Tanakh/', '^Tanakh/Targum/', # make sure that the rest of the Tanakh commentaries come b/f any other category commenary + '^Tanakh/Commentary', '^Tanakh/Commentary/', '^Other/Commentary2/Tanakh/', '^Tanakh/Targum/', # make sure that the rest of the Tanakh commentaries come b/f any other category commentary - '^Mishnah/Commentary/Bartenura', '^Mishnah/Commentary/Ikar Tosafot Yom Tov', '/Mishnah/Tosafot Yom Tov', '^Mishnah/Commentary/', '^Other/Commentary2/Mishnah/', # make sure that the rest of the mishna commentaries come b/f any other category commenary + '^Mishnah/Commentary/Bartenura', '^Mishnah/Commentary/Ikar Tosafot Yom Tov', '/Mishnah/Tosafot Yom Tov', '^Mishnah/Commentary/', '^Other/Commentary2/Mishnah/', # make sure that the rest of the mishna commentaries come b/f any other category commentary '^Talmud/Commentary/Rashi', '^Talmud/Commentary/Tosafot', '/Talmud/Rashba', '^Talmud/Rif/', '^Talmud/Commentary/', '^Other/Commentary2/Talmud/', diff --git a/scripts/headers/createHeaders.py b/scripts/headers/createHeaders.py index 44ba084..01b3a9a 100755 --- a/scripts/headers/createHeaders.py +++ b/scripts/headers/createHeaders.py @@ -56,7 +56,7 @@ def int2heb(num): heb = "" + hChar1 + heb; place+=1; #now search for 15 & 16 to replace - ka = "�" + "�"; #carefull...don't join these strings + ka = "�" + "�"; #careful...don't join these strings ku = "�" + "�"; heb = heb.replace(ka,"��"); heb = heb.replace(ku,"��"); @@ -214,4 +214,4 @@ def main(): addHeaders("Yismach Yisrael" + " on Pesach Haggadah",3, hagadahSectionsEn, hagadahSectionsHe, 0, 0) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/Book.java b/src/Book.java index caa750e..8e65e11 100755 --- a/src/Book.java +++ b/src/Book.java @@ -229,7 +229,7 @@ public static void addBook(Connection c, JSONObject enJSON, JSONObject heJSON, b } textDepth = str2strArray(sectionNames).length; if(str2strArray(heSectionNames).length != textDepth){ - System.err.println("section names convertion problem:" + heSectionNames); + System.err.println("section names conversion problem:" + heSectionNames); System.exit(-1); } diff --git a/src/CSVReader.java b/src/CSVReader.java index 94c37a5..30a6e71 100755 --- a/src/CSVReader.java +++ b/src/CSVReader.java @@ -149,8 +149,8 @@ private String[] parseLine(String nextLine) throws IOException { }else{ inQuotes = !inQuotes; // the tricky case of an embedded quote in the middle: a,bc"d"ef,g - if(i>2 //not on the begining of the line - && nextLine.charAt(i-1) != this.separator //not at the begining of an escape sequence + if(i>2 //not on the beginning of the line + && nextLine.charAt(i-1) != this.separator //not at the beginning of an escape sequence && nextLine.length()>(i+1) && nextLine.charAt(i+1) != this.separator //not at the end of an escape sequence ){ diff --git a/src/Header.java b/src/Header.java index 0f5e27b..0993893 100755 --- a/src/Header.java +++ b/src/Header.java @@ -13,7 +13,7 @@ public class Header extends SQLite { static int headersFailed = 0; - public static final String CREATE_HEADES_TABLE = "CREATE TABLE " + TABLE_HEADERS + "(\r\n" + + public static final String CREATE_HEADERS_TABLE = "CREATE TABLE " + TABLE_HEADERS + "(\r\n" + " _id INTEGER PRIMARY KEY,\r\n" + " bid INTEGER,\r\n" + " heHeader TEXT,\r\n" + diff --git a/src/Huffman.java b/src/Huffman.java index fd65b75..5367ed2 100755 --- a/src/Huffman.java +++ b/src/Huffman.java @@ -109,8 +109,8 @@ public static void addAllTexts(Connection c){ String deflated = Huffman.getDeflatedTree(); System.out.println((new Date()).getTime() + "finished deflating"); System.out.println("deflated size:"+ Huffman.utf8Length(deflated)); - huffmanRoot = Huffman.enflateTree(deflated); - System.out.println((new Date()).getTime() + "finished enflating"); + huffmanRoot = Huffman.inflateTree(deflated); + System.out.println((new Date()).getTime() + "finished inflating"); if(testStr.equals(decode(compressedTest))){ System.out.println("Good: decoding"); }else @@ -337,7 +337,7 @@ public static void copyNewDB(String oldDB, String newDB, Searching.SEARCH_METHOD /* * copyTable(c, "Texts", Text.CREATE_TEXTS_TABLE, newDB); - * copyTable(c, "Headers", Header.CREATE_HEADES_TABLE, newDB); + * copyTable(c, "Headers", Header.CREATE_HEADERS_TABLE, newDB); * copyTable(c, "Links", Link.CREATE_TABLE_LINKS, newDB); */ copyTable(c, "android_metadata", CREATE_TABLE_METADATA, newDB); @@ -345,7 +345,7 @@ public static void copyNewDB(String oldDB, String newDB, Searching.SEARCH_METHOD //copyTable(c, "Searching", Searching.CREATE_SEARCH, newDB); Searching.makeSearching(searchMethod, c, oldDB, newDB); - setSettings("version", DB_VERION_NUM +"", c); + setSettings("version", DB_VERSION_NUM +"", c); copyTextTable(c, oldDB); c.close(); @@ -378,7 +378,7 @@ public static void copyNewHeTextOnlyDB(String oldDB, String newDB, Searching.SEA c.prepareStatement("INSERT INTO heTexts (" + columns + ") SELECT " + "heTextCompress" + " FROM oldDB.Texts").execute(); Searching.makeSearching(searchMethod, c, oldDB, newDB); - setSettings("version", DB_VERION_NUM +"", c); + setSettings("version", DB_VERSION_NUM +"", c); c.close(); @@ -402,7 +402,7 @@ public static void copyNewAPIDB(String oldDB, String newDB){ copyTable(c, "android_metadata", CREATE_TABLE_METADATA, newDB); setSettings("api", ""+1, c); - setSettings("version", DB_VERION_NUM +"", c); + setSettings("version", DB_VERSION_NUM +"", c); c.close(); } catch (SQLException e) { e.printStackTrace(); @@ -512,14 +512,14 @@ public static void test(){ makeTree(); List encodedText = encode(text); System.out.println(decode(encodedText)); - String defalted = getDeflatedTree(); - System.out.println(defalted); - huffmanRoot = enflateTree(defalted); + String deflated = getDeflatedTree(); + System.out.println(deflated); + huffmanRoot = inflateTree(deflated); //printTree(huffmanRoot, ""); if(!text.equals(decode(encodedText))) - System.err.println("problem with defalted thing"); + System.err.println("problem with deflated thing"); else{ System.out.println("\nGood Work!!\n" + decode(encodedText)); } @@ -556,7 +556,7 @@ else if(node.leftChild.plainText == null && cameFrom != node.leftChild){ } } - public static Huffman enflateTree(String deflated){ + public static Huffman inflateTree(String deflated){ Date date = new Date(); long startTime = date.getTime(); Huffman root = new Huffman(); @@ -594,7 +594,7 @@ public static Huffman enflateTree(String deflated){ node = tempNode; } } - System.out.println("enflation took:" + ((new Date()).getTime() - startTime)/1000.0); + System.out.println("inflation took:" + ((new Date()).getTime() - startTime)/1000.0); return root; } diff --git a/src/Link.java b/src/Link.java index d775fc8..ec78e1f 100755 --- a/src/Link.java +++ b/src/Link.java @@ -69,7 +69,7 @@ static String getTitleFromComplex(String fullPath){ } public static Node.NodePair getParentID(String title, String fullPath, int bid){ - if(title.equals(fullPath)){ //it's referencing the book directly... A good example of this is when there's a default structre that it's referencing (or anything without subnodes) + if(title.equals(fullPath)){ //it's referencing the book directly... A good example of this is when there's a default structure that it's referencing (or anything without subnodes) Node.NodePair nodePair = null; if(booksIsComplex.get(bid)){ nodePair = allDefaultNodesByBID.get(bid); @@ -191,7 +191,7 @@ static void addLinkFile(Connection c, CSVReader reader){ } /** - * repositions the row so that it will be consistant (ignoring textdepth) when trying to get the values at each level. + * repositions the row so that it will be consistent (ignoring textdepth) when trying to get the values at each level. * @param row * @param bida * @param textDeptha @@ -207,7 +207,7 @@ static void addLinkFile(Connection c, CSVReader reader){ row[i - 1] = row[i]; } row[startingNum] = "0"; - //Log.d("sql_link_values", "preforming fix row[x] A-" + booka.title + " " + whileLoopC++); + //Log.d("sql_link_values", "performing fix row[x] A-" + booka.title + " " + whileLoopC++); } startingNum = 13; @@ -217,13 +217,13 @@ static void addLinkFile(Connection c, CSVReader reader){ row[i - 1] = row[i]; } row[startingNum] = "0"; - //Log.d("sql_link_values", "preforming fix row[x] B-" + bookb.title+ " " + whileLoopC++ ); + //Log.d("sql_link_values", "performing fix row[x] B-" + bookb.title+ " " + whileLoopC++ ); } return row; } private static PreparedStatement putValues(PreparedStatement stmt, String [] row, int bida, int bidb, boolean addConnType) throws NumberFormatException, SQLException{ - //row shuold already be repositioned + //row should already be repositioned stmt.setInt(1, bida); stmt.setInt(2, catchDafs(row[6])); diff --git a/src/Node.java b/src/Node.java index 5ce9de2..7c1f677 100755 --- a/src/Node.java +++ b/src/Node.java @@ -55,9 +55,9 @@ public class Node extends SQLite{ " structNum INTEGER NOT NULL default 1,\r\n" + " textDepth INTEGER,\r\n" + - " startTid INTEGER,\r\n" + //maybe only used with refferences on alt structure - " endTid INTEGER,\r\n" + //maybe only used with refferences on alt structure - " extraTids TEXT,\r\n" + //maybe only used with refferences on alt structure ex. "[34-70,98-200]" + " startTid INTEGER,\r\n" + //maybe only used with references on alt structure + " endTid INTEGER,\r\n" + //maybe only used with references on alt structure + " extraTids TEXT,\r\n" + //maybe only used with references on alt structure ex. "[34-70,98-200]" " startLevels TEXT,\r\n" + // " key TEXT,\r\n" + // //maybe some stuff like to display chap name and or number (ei. maybe add some displaying info) @@ -763,7 +763,7 @@ protected static int addWholeSchemas(Connection c, JSONObject schemas) throws JS try{ JSONObject alts = schemas.getJSONObject("alts"); String bookTitle = schemas.getString("title"); - String default_struct = "__UNUSED__"; //didn't leave it blank in case 2 sturcts both have no name and I push them together. + String default_struct = "__UNUSED__"; //didn't leave it blank in case 2 structs both have no name and I push them together. try{ default_struct = schemas.getString("default_struct"); }catch(Exception e){ diff --git a/src/SQLite.java b/src/SQLite.java index aff57d3..f37d691 100755 --- a/src/SQLite.java +++ b/src/SQLite.java @@ -23,11 +23,11 @@ public class SQLite { - protected static final int DB_VERION_NUM = 277; - public static final String DB_NAME_PART = "test" + DB_VERION_NUM; + protected static final int DB_VERSION_NUM = 277; + public static final String DB_NAME_PART = "test" + DB_VERSION_NUM; public static final String DB_NAME_FULL = "testDBs/" + DB_NAME_PART + ".db"; public static final String DB_NAME_COPY = "testDBs/UpdateForSefariaMobileDatabase.db";//copy_" + DB_NAME_PART + ".db"; - public static final String DB_NAME_HE_ONLY_COPY = "testDBs/heTexts_" + DB_VERION_NUM + ".db"; + public static final String DB_NAME_HE_ONLY_COPY = "testDBs/heTexts_" + DB_VERSION_NUM + ".db"; public static final String DB_NAME_API = "testDBs/API_UpdateForSefariaMobileDatabase.db"; private static final int OLD_DB_NUM_TO_COPY_FROM = 265; @@ -158,7 +158,7 @@ public static void createTables(){ stmt.executeUpdate(Node.CREATE_NODE_TABLE); stmt.executeUpdate(Searching.CREATE_SEARCH); stmt.executeUpdate(Book.CREATE_BOOKS_TABLE); - stmt.executeUpdate(Header.CREATE_HEADES_TABLE); + stmt.executeUpdate(Header.CREATE_HEADERS_TABLE); stmt.executeUpdate(CREATE_TABLE_SETTINGS); @@ -169,7 +169,7 @@ public static void createTables(){ stmt.executeUpdate(" INSERT INTO \"android_metadata\" VALUES ('en_US')"); stmt.close(); - setSettings("version", ""+DB_VERION_NUM, c); + setSettings("version", ""+DB_VERSION_NUM, c); setSettings("api", ""+0, c); System.out.println("Created tables"); } catch ( Exception e ) { @@ -407,7 +407,7 @@ static int returnLangNums(String langString){ return LANG_EN; else if(langString.equals("he")) return LANG_HE; - System.err.println("unrecignized lang:" + langString); + System.err.println("unrecognized lang:" + langString); return 0; } diff --git a/src/Searching.java b/src/Searching.java index a700f49..f381fcb 100755 --- a/src/Searching.java +++ b/src/Searching.java @@ -90,7 +90,7 @@ public static void makeFreshIndex(Connection newDBConnection, String oldDB, SEAR private static String [] getWords(String text){ - //String orgTetx = ""+ text; + //String orgText = ""+ text; text = text.replaceAll("[\u05be]", " "); text = text.replaceAll("[\u0591-\u05C7\u05f3\u05f4\'\"]", ""); text = text.replaceAll("([^\u05d0-\u05ea])", " "); @@ -222,7 +222,7 @@ private static byte[] toJHpackets(BitSet bits) { ArrayList blob = new ArrayList(); int packetCount = bits.length()/BITS_PER_PACKET + 1; if(packetCount > 255){ - System.err.println("TO BIG PACKET COUNT... MUST CHANGE TO 8 BYTE packets...packetCouunt: " + packetCount); + System.err.println("TO BIG PACKET COUNT... MUST CHANGE TO 8 BYTE packets...packetCount: " + packetCount); System.exit(-1); } @@ -240,7 +240,7 @@ private static byte[] toJHpackets(BitSet bits) { packBits.set(j); } }catch(Exception e){ - System.err.println("bitnum: " + bitNum + ".. bits.length:" + bits.length() + " (i,j): " + i + "," + j + "...packetCouunt: " + packetCount); + System.err.println("bitnum: " + bitNum + ".. bits.length:" + bits.length() + " (i,j): " + i + "," + j + "...packetCount: " + packetCount); break; } } diff --git a/src/Text.java b/src/Text.java index fc3da4e..5f1a5e4 100755 --- a/src/Text.java +++ b/src/Text.java @@ -310,14 +310,14 @@ protected static int insertValues(Connection c, String title,int textDepth, int //theText = convertToJH8(theText);////CONVERT FROM UTF8!!!!!!! //convertFromJH8(theText); - }catch(Exception e){ //if there was a problem getting the text, then it probably wasn't text anyways so just leave the function. + }catch(Exception e){ //if there was a problem getting the text, then it probably wasn't text anyway so just leave the function. System.err.println("Error: " + e); System.err.println("sql_adding_text: Problem adding text " + title + " it[1] = " + it[1]); textsFailedToUpload++; return -1; } - //Huffman.addTextCount(theText);//commented out in order to make the copying a seperated task + //Huffman.addTextCount(theText);//commented out in order to make the copying a separated task PreparedStatement stmt = null; try{ stmt = c.prepareStatement("INSERT INTO Texts (" diff --git a/src/org/json/JSONArray.java b/src/org/json/JSONArray.java index 3f05548..7d11619 100755 --- a/src/org/json/JSONArray.java +++ b/src/org/json/JSONArray.java @@ -547,7 +547,7 @@ public long optLong(int index, long defaultValue) { /** * Get the optional string value associated with an index. It returns an * empty string if there is no value at that index. If the value is not a - * string and is not null, then it is coverted to a string. + * string and is not null, then it is converted to a string. * * @param index * The index must be between 0 and length() - 1. @@ -934,7 +934,7 @@ public Writer write(Writer writer) throws JSONException { * @param indentFactor * The number of spaces to add to each level of indentation. * @param indent - * The indention of the top level. + * The indentation of the top level. * @return The writer. * @throws JSONException */