@@ -70,8 +70,11 @@ def onehot(i,original_dim):
7070# summary:
7171#
7272# let N be original space, M be embedding space
73- # H maps k-hot vectors to CM embeddings:
73+ # H (h in code) maps k-hot vectors to CM embeddings:
7474# for all i, H[i,j_k]=1 for D distinct hashes of i, { j_1, ..., j_K }
75+ # i.e., the J-th column of H indicates which indices [in the original N space] get hashed to index J in the c-m space
76+ #
77+ # m is a N-by-N matrix, intended to encode a relation p(X,Y)
7578#
7679# 1) to embed a one-hot vector v, compute ev = vH
7780# 2) to embed a matrix M mapping i to i' in the original space,
@@ -82,9 +85,9 @@ def onehot(i,original_dim):
8285# 4) to estimate (vM)[i,i1] from w = (ev eM), look at
8386# min{ w[ w >= (u_i1)H ] } ---I think, not tested
8487
85- def run_main ():
86- original_dim = 9
87- embedded_dim = 10
88+ def run_main1 ():
89+ original_dim = 10
90+ embedded_dim = 5
8891
8992 #hash_salt = [hash("william"),hash("cohen"),hash("rubber duckie")]
9093 hash_salt = [hash ("william" ),hash ("cohen" )]
@@ -96,7 +99,6 @@ def run_main():
9699 show ('mh' ,mh ,code = 'embedded' ,h = h )
97100 #this isn't quite right since you need to allow for possibility
98101 #of hash collisions in h
99- #hTbyD = h.transpose()*(1.0/len(hash_salt))
100102 oneByD = np .reciprocal (h .sum (1 ))
101103 hTbyD = h .transpose ()* oneByD
102104 show ('h^T/D' ,hTbyD )
@@ -130,4 +132,14 @@ def check_results(i):
130132 print 'tot_collisions' ,tot_collisions ,'tot' ,tot
131133
132134if __name__ == "__main__" :
133- run_main ()
135+ original_dim = 10
136+ embedded_dim = 5
137+ hash_salt = [hash ("william" ),hash ("cohen" )]
138+ H = embedder_matrix (original_dim ,embedded_dim ,hash_salt )
139+ x = onehot (7 ,original_dim )
140+ ex = np .dot (x ,H )
141+ print 'x' ,x
142+ print 'ex' ,ex
143+
144+
145+
0 commit comments