From 6d5cf01f2af40f732132a4a05919d1aa5625477d Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 29 Jun 2020 16:09:24 +0800 Subject: [PATCH 1/3] try wnd --- .../recommendation/Ml1mWideAndDeep.scala | 33 ++++++++++--------- .../zoo/models/recommendation/Utils.scala | 2 +- .../models/recommendation/WideAndDeep.scala | 2 +- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala b/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala index a6b78033aff..25fb3ab6421 100644 --- a/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala +++ b/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala @@ -17,10 +17,11 @@ package com.intel.analytics.zoo.examples.recommendation import com.intel.analytics.bigdl.numeric.NumericFloat -import com.intel.analytics.bigdl.optim.{Adam, Top1Accuracy} +import com.intel.analytics.bigdl.optim.{Adam, PrecisionRecallAUC, Top1Accuracy} import com.intel.analytics.zoo.common.NNContext import com.intel.analytics.zoo.models.recommendation._ -import com.intel.analytics.zoo.pipeline.api.keras.objectives.SparseCategoricalCrossEntropy +import com.intel.analytics.zoo.pipeline.api.keras.metrics.AUC +import com.intel.analytics.zoo.pipeline.api.keras.objectives.{BinaryCrossEntropy, SparseCategoricalCrossEntropy} import org.apache.log4j.{Level, Logger} import org.apache.spark.SparkConf import org.apache.spark.sql.functions._ @@ -46,19 +47,11 @@ object Ml1mWideAndDeep { val bucketSize = 100 val localColumnInfo = ColumnFeatureInfo( wideBaseCols = Array("occupation", "gender"), - wideBaseDims = Array(21, 3), - wideCrossCols = Array("age-gender"), - wideCrossDims = Array(bucketSize), - indicatorCols = Array("genres", "gender"), - indicatorDims = Array(19, 3), - embedCols = Array("userId", "itemId"), - embedInDims = Array(userCount, itemCount), - embedOutDims = Array(64, 64), - continuousCols = Array("age")) + wideBaseDims = Array(21, 3)) val wideAndDeep: WideAndDeep[Float] = WideAndDeep[Float]( params.modelType, - numClasses = 5, + numClasses = 1, columnInfo = localColumnInfo) val isImplicit = false @@ -75,8 +68,8 @@ object Ml1mWideAndDeep { learningRateDecay = 1e-5) wideAndDeep.compile(optimizer = optimMethod, - loss = SparseCategoricalCrossEntropy[Float](zeroBasedLabel = false), - metrics = List(new Top1Accuracy[Float]()) + loss = BinaryCrossEntropy[Float](), + metrics = List(new PrecisionRecallAUC[Float]()) ) wideAndDeep.fit(trainRdds, batchSize = params.batchSize, nbEpoch = params.maxEpoch, validationData = validationRdds) @@ -162,7 +155,17 @@ object Ml1mWideAndDeep { .select(unioned("userId"), unioned("itemId"), col("label"), col("gender"), col("age"), col("occupation"), col("genres"), col("age-gender")) - val rddOfSample = joined.rdd.map(r => { + // 3|261197| + // 5|226310| + val filtered = joined.filter(joined("label") === 3 || joined("label") === 5) + + val relabel = (s: Int) => { + if (s == 3) 0 else 1 + } + val relabelUDF = udf(relabel) + val relabeled = filtered.withColumn("label", relabelUDF(col("label"))) + + val rddOfSample = relabeled.rdd.map(r => { val uid = r.getAs[Int]("userId") val iid = r.getAs[Int]("itemId") UserItemFeature(uid, iid, Utils.row2Sample(r, columnInfo, modelType)) diff --git a/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/Utils.scala b/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/Utils.scala index 0d8cf99c5af..beb8b0e0870 100644 --- a/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/Utils.scala +++ b/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/Utils.scala @@ -111,7 +111,7 @@ object Utils { val deepTensor: Array[Tensor[Float]] = getDeepTensors(r, columnInfo) val l = r.getAs[Int](columnInfo.label) val label = Tensor[Float](T(l)) - label.resize(1, 1) +// label.resize(1, 1) modelType match { case "wide_n_deep" => diff --git a/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/WideAndDeep.scala b/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/WideAndDeep.scala index c19c2577b05..bd18fadfa42 100644 --- a/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/WideAndDeep.scala +++ b/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/WideAndDeep.scala @@ -121,7 +121,7 @@ class WideAndDeep[T: ClassTag]( modelType match { case "wide" => - val out = Activation("softmax").inputs(wideLinear) + val out = Activation("sigmoid").inputs(wideLinear) val model: Model[T] = Model(Array(inputWide), out) model.asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]] From fc8937e67a32d85f77d90f231ab1ab2c91ffdf0d Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 29 Jun 2020 16:39:55 +0800 Subject: [PATCH 2/3] update --- .../examples/recommendation/Ml1mWideAndDeep.scala | 13 +++++++++++-- .../zoo/models/recommendation/WideAndDeep.scala | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala b/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala index 25fb3ab6421..b324e9802b5 100644 --- a/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala +++ b/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala @@ -47,7 +47,15 @@ object Ml1mWideAndDeep { val bucketSize = 100 val localColumnInfo = ColumnFeatureInfo( wideBaseCols = Array("occupation", "gender"), - wideBaseDims = Array(21, 3)) + wideBaseDims = Array(21, 3), + wideCrossCols = Array("age-gender"), + wideCrossDims = Array(bucketSize), + indicatorCols = Array("genres", "gender"), + indicatorDims = Array(19, 3), + embedCols = Array("userId", "itemId"), + embedInDims = Array(userCount, itemCount), + embedOutDims = Array(64, 64), + continuousCols = Array("age")) val wideAndDeep: WideAndDeep[Float] = WideAndDeep[Float]( params.modelType, @@ -62,6 +70,7 @@ object Ml1mWideAndDeep { featureRdds.randomSplit(Array(0.8, 0.2)) val trainRdds = trainpairFeatureRdds.map(x => x.sample) val validationRdds = validationpairFeatureRdds.map(x => x.sample) + println(validationRdds.count()) val optimMethod = new Adam[Float]( learningRate = 1e-2, @@ -69,7 +78,7 @@ object Ml1mWideAndDeep { wideAndDeep.compile(optimizer = optimMethod, loss = BinaryCrossEntropy[Float](), - metrics = List(new PrecisionRecallAUC[Float]()) + metrics = List(new AUC[Float]()) ) wideAndDeep.fit(trainRdds, batchSize = params.batchSize, nbEpoch = params.maxEpoch, validationData = validationRdds) diff --git a/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/WideAndDeep.scala b/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/WideAndDeep.scala index bd18fadfa42..9c5393613c4 100644 --- a/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/WideAndDeep.scala +++ b/zoo/src/main/scala/com/intel/analytics/zoo/models/recommendation/WideAndDeep.scala @@ -128,14 +128,14 @@ class WideAndDeep[T: ClassTag]( case "deep" => val (inputDeep, mergeList) = deepMerge(inputInd, inputEmb, inputCon) val deepLinear = deepHidden(mergeList.toList) - val out = Activation("softmax").inputs(deepLinear) + val out = Activation("sigmoid").inputs(deepLinear) Model(inputDeep, out).asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]] case "wide_n_deep" => val (inputDeep, mergeList) = deepMerge(inputInd, inputEmb, inputCon) val deepLinear = deepHidden(mergeList) val merged = Merge.merge(List(wideLinear, deepLinear), "sum") - val out = Activation("softmax").inputs(merged) + val out = Activation("sigmoid").inputs(merged) Model(Array(inputWide) ++ inputDeep, out) .asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]] case _ => From 22bc89356cebcf5985ac3cca5f24de6ace4074e0 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 7 Jul 2020 17:09:07 +0800 Subject: [PATCH 3/3] fix auc --- .../zoo/examples/recommendation/Ml1mWideAndDeep.scala | 2 +- .../analytics/zoo/pipeline/api/keras/metrics/AUC.scala | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala b/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala index b324e9802b5..d5ebc9bde42 100644 --- a/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala +++ b/zoo/src/main/scala/com/intel/analytics/zoo/examples/recommendation/Ml1mWideAndDeep.scala @@ -78,7 +78,7 @@ object Ml1mWideAndDeep { wideAndDeep.compile(optimizer = optimMethod, loss = BinaryCrossEntropy[Float](), - metrics = List(new AUC[Float]()) + metrics = List(new AUC[Float](), new Top1Accuracy[Float]()) ) wideAndDeep.fit(trainRdds, batchSize = params.batchSize, nbEpoch = params.maxEpoch, validationData = validationRdds) diff --git a/zoo/src/main/scala/com/intel/analytics/zoo/pipeline/api/keras/metrics/AUC.scala b/zoo/src/main/scala/com/intel/analytics/zoo/pipeline/api/keras/metrics/AUC.scala index c3cc0e62e9b..61d61ebcfb3 100644 --- a/zoo/src/main/scala/com/intel/analytics/zoo/pipeline/api/keras/metrics/AUC.scala +++ b/zoo/src/main/scala/com/intel/analytics/zoo/pipeline/api/keras/metrics/AUC.scala @@ -131,15 +131,15 @@ class AUC[T](thresholdNum: Int = 200)(implicit ev: TensorNumeric[T]) override def apply(output: Activity, target: Activity): ValidationResult = { val _output = if (output.asInstanceOf[Tensor[T]].dim() == 2) { - output.asInstanceOf[Tensor[T]].squeeze(2) + output.asInstanceOf[Tensor[T]].clone().squeeze(2) } else { - output.asInstanceOf[Tensor[T]].squeeze() + output.asInstanceOf[Tensor[T]].clone().squeeze() } val _target = if (target.asInstanceOf[Tensor[T]].dim() == 2) { - target.asInstanceOf[Tensor[T]].squeeze(2) + target.asInstanceOf[Tensor[T]].clone().squeeze(2) } else { - target.asInstanceOf[Tensor[T]].squeeze() + target.asInstanceOf[Tensor[T]].clone().squeeze() } require(_output.dim() <= 2 && _target.dim() <= 2, s"${_output.dim()} dim format is not supported")