From 138bb519975407d4ea0dc1478d897d451ef05dab Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Tue, 4 Apr 2023 17:15:37 -0700 Subject: [PATCH] [cr-mixer/home-mixer] Remove `getLinearRankingParams` in EarlybirdTensorflowBasedSimilarityEngine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused ranking params which are specified by services when making an Earlybird relevance search. For cr-mixer: since we always set useTensorflowRanking = true in EarlybirdSimilarityEngineRouter, we will only ever use the TensorFlowBasedScoringFunction for ranking search results. That function doesn't rely on any of the linear params specified in getLinearRankingParams, nor the boosts because we set applyBoosts = false in the request. These parameters are therefore strictly redundant. The parameters in home-mixer can be removed for essentially the same reason—the parameters are redundant given that we use the Tensorflow scoring function and don't apply boosts. --- ...ybirdTensorflowBasedSimilarityEngine.scala | 43 +++---------------- .../util/earlybird/RelevanceSearchUtil.scala | 22 ---------- 2 files changed, 5 insertions(+), 60 deletions(-) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdTensorflowBasedSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdTensorflowBasedSimilarityEngine.scala index dd29a067b..8df6ec711 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdTensorflowBasedSimilarityEngine.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdTensorflowBasedSimilarityEngine.scala @@ -6,8 +6,6 @@ import com.twitter.search.earlybird.thriftscala.EarlybirdService import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery import com.twitter.util.Time import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams -import com.twitter.search.common.ranking.thriftscala.ThriftAgeDecayRankingParams -import com.twitter.search.common.ranking.thriftscala.ThriftLinearFeatureRankingParams import com.twitter.search.common.ranking.thriftscala.ThriftRankingParams import com.twitter.search.common.ranking.thriftscala.ThriftScoringFunctionType import com.twitter.search.earlybird.thriftscala.ThriftSearchRelevanceOptions @@ -97,7 +95,7 @@ object EarlybirdTensorflowBasedSimilarityEngine { // Whether to collect conversation IDs. Remove it for now. // collectConversationId = Gate.True(), // true for Home rankingMode = ThriftSearchRankingMode.Relevance, - relevanceOptions = Some(getRelevanceOptions(query.useTensorflowRanking)), + relevanceOptions = Some(getRelevanceOptions), collectorParams = Some( CollectorParams( // numResultsToReturn defines how many results each EB shard will return to search root @@ -116,13 +114,11 @@ object EarlybirdTensorflowBasedSimilarityEngine { // The specific values of recap relevance/reranking options correspond to // experiment: enable_recap_reranking_2988,timeline_internal_disable_recap_filter // bucket : enable_rerank,disable_filter - private def getRelevanceOptions(useTensorflowRanking: Boolean): ThriftSearchRelevanceOptions = { + private def getRelevanceOptions: ThriftSearchRelevanceOptions = { ThriftSearchRelevanceOptions( proximityScoring = true, maxConsecutiveSameUser = Some(2), - rankingParams = - if (useTensorflowRanking) Some(getTensorflowBasedRankingParams) - else Some(getLinearRankingParams), + rankingParams = Some(getTensorflowBasedRankingParams), maxHitsToProcess = Some(500), maxUserBlendCount = Some(3), proximityPhraseWeight = 9.0, @@ -131,41 +127,12 @@ object EarlybirdTensorflowBasedSimilarityEngine { } private def getTensorflowBasedRankingParams: ThriftRankingParams = { - getLinearRankingParams.copy( + ThriftRankingParams( `type` = Some(ThriftScoringFunctionType.TensorflowBased), selectedTensorflowModel = Some("timelines_rectweet_replica"), + minScore = -1.0e100, applyBoosts = false, authorSpecificScoreAdjustments = None ) } - - private def getLinearRankingParams: ThriftRankingParams = { - ThriftRankingParams( - `type` = Some(ThriftScoringFunctionType.Linear), - minScore = -1.0e100, - retweetCountParams = Some(ThriftLinearFeatureRankingParams(weight = 20.0)), - replyCountParams = Some(ThriftLinearFeatureRankingParams(weight = 1.0)), - reputationParams = Some(ThriftLinearFeatureRankingParams(weight = 0.2)), - luceneScoreParams = Some(ThriftLinearFeatureRankingParams(weight = 2.0)), - textScoreParams = Some(ThriftLinearFeatureRankingParams(weight = 0.18)), - urlParams = Some(ThriftLinearFeatureRankingParams(weight = 2.0)), - isReplyParams = Some(ThriftLinearFeatureRankingParams(weight = 1.0)), - favCountParams = Some(ThriftLinearFeatureRankingParams(weight = 30.0)), - langEnglishUIBoost = 0.5, - langEnglishTweetBoost = 0.2, - langDefaultBoost = 0.02, - unknownLanguageBoost = 0.05, - offensiveBoost = 0.1, - inTrustedCircleBoost = 3.0, - multipleHashtagsOrTrendsBoost = 0.6, - inDirectFollowBoost = 4.0, - tweetHasTrendBoost = 1.1, - selfTweetBoost = 2.0, - tweetHasImageUrlBoost = 2.0, - tweetHasVideoUrlBoost = 2.0, - useUserLanguageInfo = true, - ageDecayParams = Some(ThriftAgeDecayRankingParams(slope = 0.005, base = 1.0)) - ) - } - } diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.scala index 30be20d60..0de4546a6 100644 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.scala +++ b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.scala @@ -15,28 +15,6 @@ object RelevanceSearchUtil { `type` = Some(scr.ThriftScoringFunctionType.TensorflowBased), selectedTensorflowModel = Some("timelines_rectweet_replica"), minScore = -1.0e100, - retweetCountParams = Some(scr.ThriftLinearFeatureRankingParams(weight = 20.0)), - replyCountParams = Some(scr.ThriftLinearFeatureRankingParams(weight = 1.0)), - reputationParams = Some(scr.ThriftLinearFeatureRankingParams(weight = 0.2)), - luceneScoreParams = Some(scr.ThriftLinearFeatureRankingParams(weight = 2.0)), - textScoreParams = Some(scr.ThriftLinearFeatureRankingParams(weight = 0.18)), - urlParams = Some(scr.ThriftLinearFeatureRankingParams(weight = 2.0)), - isReplyParams = Some(scr.ThriftLinearFeatureRankingParams(weight = 1.0)), - favCountParams = Some(scr.ThriftLinearFeatureRankingParams(weight = 30.0)), - langEnglishUIBoost = 0.5, - langEnglishTweetBoost = 0.2, - langDefaultBoost = 0.02, - unknownLanguageBoost = 0.05, - offensiveBoost = 0.1, - inTrustedCircleBoost = 3.0, - multipleHashtagsOrTrendsBoost = 0.6, - inDirectFollowBoost = 4.0, - tweetHasTrendBoost = 1.1, - selfTweetBoost = 2.0, - tweetHasImageUrlBoost = 2.0, - tweetHasVideoUrlBoost = 2.0, - useUserLanguageInfo = true, - ageDecayParams = Some(scr.ThriftAgeDecayRankingParams(slope = 0.005, base = 1.0)), selectedModels = Some(Map("home_mixer_unified_engagement_prod" -> 1.0)), applyBoosts = false, )