Open-sourcing Tweetypie

Tweetypie is the core Tweet service that handles the reading and writing of Tweet data.
2024-12-18 02:41:37 +03:00 · 2023-05-12 09:11:38 -07:00 · 2023-05-12 09:11:38 -07:00 · 01dbfee4c0
commit 01dbfee4c0
parent 90d7ea370e
591 changed files with 68352 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -24,6 +24,7 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw
 |                    | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time. |
 |                    | [representation-manager](representation-manager/README.md) | Service to retrieve embeddings (i.e. SimClusers and TwHIN). |
 |                    | [twml](twml/README.md) | Legacy machine learning framework built on TensorFlow v1. |
 |                    | [Tweetypie](tweetypie/server/README.md) | Core Tweet service that handles the reading and writing of Tweet data. |
 The product surface currently included in this repository is the For You Timeline.
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala
@ -0,0 +1,118 @@
 package com.twitter.tweetypie.additionalfields
 import com.twitter.tweetypie.thriftscala.Tweet
 import com.twitter.scrooge.TFieldBlob
 import com.twitter.scrooge.ThriftStructField
 object AdditionalFields {
  type FieldId = Short
  /** additional fields really start at 100, be we are ignoring conversation id for now */
  val StartAdditionalId = 101
  /** all known [[Tweet]] field IDs */
  val CompiledFieldIds: Seq[FieldId] = Tweet.metaData.fields.map(_.id)
  /** all known [[Tweet]] fields in the "additional-field" range (excludes id) */
  val CompiledAdditionalFieldMetaDatas: Seq[ThriftStructField[Tweet]] =
    Tweet.metaData.fields.filter(f => isAdditionalFieldId(f.id))
  val CompiledAdditionalFieldsMap: Map[Short, ThriftStructField[Tweet]] =
    CompiledAdditionalFieldMetaDatas.map(field => (field.id, field)).toMap
  /** all known [[Tweet]] field IDs in the "additional-field" range */
  val CompiledAdditionalFieldIds: Seq[FieldId] =
    CompiledAdditionalFieldsMap.keys.toSeq
  /** all [[Tweet]] field IDs which should be rejected when set as additional
   * fields on via PostTweetRequest.additionalFields or RetweetRequest.additionalFields */
  val RejectedFieldIds: Seq[FieldId] = Seq(
    // Should be provided via PostTweetRequest.conversationControl field. go/convocontrolsbackend
    Tweet.ConversationControlField.id,
    // This field should only be set based on whether the client sets the right community
    // tweet annotation.
    Tweet.CommunitiesField.id,
    // This field should not be set by clients and should opt for
    // [[PostTweetRequest.ExclusiveTweetControlOptions]].
    // The exclusiveTweetControl field requires the userId to be set
    // and we shouldn't trust the client to provide the right one.
    Tweet.ExclusiveTweetControlField.id,
    // This field should not be set by clients and should opt for
    // [[PostTweetRequest.TrustedFriendsControlOptions]].
    // The trustedFriendsControl field requires the trustedFriendsListId to be
    // set and we shouldn't trust the client to provide the right one.
    Tweet.TrustedFriendsControlField.id,
    // This field should not be set by clients and should opt for
    // [[PostTweetRequest.CollabControlOptions]].
    // The collabControl field requires a list of Collaborators to be
    // set and we shouldn't trust the client to provide the right one.
    Tweet.CollabControlField.id
  )
  def isAdditionalFieldId(fieldId: FieldId): Boolean =
    fieldId >= StartAdditionalId
  /**
   * Provides a list of all additional field IDs on the tweet, which include all
   * the compiled additional fields and all the provided passthrough fields.  This includes
   * compiled additional fields where the value is None.
   */
  def allAdditionalFieldIds(tweet: Tweet): Seq[FieldId] =
    CompiledAdditionalFieldIds ++ tweet._passthroughFields.keys
  /**
   * Provides a list of all field IDs that have a value on the tweet which are not known compiled
   * additional fields (excludes [[Tweet.id]]).
   */
  def unsettableAdditionalFieldIds(tweet: Tweet): Seq[FieldId] =
    CompiledFieldIds
      .filter { id =>
        !isAdditionalFieldId(id) && id != Tweet.IdField.id && tweet.getFieldBlob(id).isDefined
      } ++
      tweet._passthroughFields.keys
  /**
   * Provides a list of all field IDs that have a value on the tweet which are explicitly disallowed
   * from being set via PostTweetRequest.additionalFields and RetweetRequest.additionalFields
   */
  def rejectedAdditionalFieldIds(tweet: Tweet): Seq[FieldId] =
    RejectedFieldIds
      .filter { id => tweet.getFieldBlob(id).isDefined }
  def unsettableAdditionalFieldIdsErrorMessage(unsettableFieldIds: Seq[FieldId]): String =
    s"request may not contain fields: [${unsettableFieldIds.sorted.mkString(", ")}]"
  /**
   * Provides a list of all additional field IDs that have a value on the tweet,
   * compiled and passthrough (excludes Tweet.id).
   */
  def nonEmptyAdditionalFieldIds(tweet: Tweet): Seq[FieldId] =
    CompiledAdditionalFieldMetaDatas.collect {
      case f if f.getValue(tweet) != None => f.id
    } ++ tweet._passthroughFields.keys
  def additionalFields(tweet: Tweet): Seq[TFieldBlob] =
    (tweet.getFieldBlobs(CompiledAdditionalFieldIds) ++ tweet._passthroughFields).values.toSeq
  /**
   * Merge base tweet with additional fields.
   * Non-additional fields in the additional tweet are ignored.
   * @param base: a tweet that contains basic fields
   * @param additional: a tweet object that carries additional fields
   */
  def setAdditionalFields(base: Tweet, additional: Tweet): Tweet =
    setAdditionalFields(base, additionalFields(additional))
  def setAdditionalFields(base: Tweet, additional: Option[Tweet]): Tweet =
    additional.map(setAdditionalFields(base, _)).getOrElse(base)
  def setAdditionalFields(base: Tweet, additional: Traversable[TFieldBlob]): Tweet =
    additional.foldLeft(base) { case (t, f) => t.setField(f) }
  /**
   * Unsets the specified fields on the given tweet.
   */
  def unsetFields(tweet: Tweet, fieldIds: Iterable[FieldId]): Tweet = {
    tweet.unsetFields(fieldIds.toSet)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD
@ -0,0 +1,15 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "3rdparty/jvm/org/apache/thrift:libthrift",
        "mediaservices/commons/src/main/thrift:thrift-scala",
        "scrooge/scrooge-core",
        "src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
        "src/thrift/com/twitter/spam/rtf:safety-label-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD
@ -0,0 +1,15 @@
 scala_library(
    compiler_option_sets = ["fatal_warnings"],
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "finagle/finagle-memcached/src/main/scala",
        "scrooge/scrooge-serializer",
        "stitch/stitch-core",
        "util/util-core",
        "util/util-logging",
        # CachedValue struct
        "tweetypie/servo/repo/src/main/thrift:thrift-scala",
        "util/util-slf4j-api/src/main/scala/com/twitter/util/logging",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala
@ -0,0 +1,241 @@
 package com.twitter.tweetypie.caching
 import com.twitter.finagle.service.StatsFilter
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.finagle.stats.ExceptionStatsHandler
 import com.twitter.finagle.stats.Counter
 import com.twitter.util.Future
 import com.twitter.util.logging.Logger
 import com.twitter.finagle.memcached
 import scala.util.control.NonFatal
 /**
 * Wrapper around a memcached client that performs serialization and
 * deserialization, tracks stats, provides tracing, and provides
 * per-key fresh/stale/failure/miss results.
 *
 * The operations that write values to cache will only write values
 * that the ValueSerializer says are cacheable. The idea here is that
 * the deserialize and serialize functions must be coherent, and no
 * matter how you choose to write these values back to cache, the
 * serializer will have the appropriate knowledge about whether the
 * values are cacheable.
 *
 * For most cases, you will want to use [[StitchCaching]] rather than
 * calling this wrapper directly.
 *
 * @param keySerializer How to convert a K value to a memcached key.
 *
 * @param valueSerializer How to serialize and deserialize V values,
 *   as well as which values are cacheable, and how long to store the
 *   values in cache.
 */
 class CacheOperations[K, V](
  keySerializer: K => String,
  valueSerializer: ValueSerializer[V],
  memcachedClient: memcached.Client,
  statsReceiver: StatsReceiver,
  logger: Logger,
  exceptionStatsHandler: ExceptionStatsHandler = StatsFilter.DefaultExceptions) {
  // The memcached operations that are performed via this
  // [[CacheOperations]] instance will be tracked under this stats
  // receiver.
  //
  // We count all memcached failures together under this scope,
  // because memcached operations should not fail unless there are
  // communication problems, so differentiating the method that was
  // being called will not give us any useful information.
  private[this] val memcachedStats: StatsReceiver = statsReceiver.scope("memcached")
  // Incremented for every attempt to `get` a key from cache.
  private[this] val memcachedGetCounter: Counter = memcachedStats.counter("get")
  // One of these two counters is incremented for every successful
  // response returned from a `get` call to memcached.
  private[this] val memcachedNotFoundCounter: Counter = memcachedStats.counter("not_found")
  private[this] val memcachedFoundCounter: Counter = memcachedStats.counter("found")
  // Records the state of the cache load after serialization. The
  // policy may transform a value that was successfully loaded from
  // cache into any result type, which is why we explicitly track
  // "found" and "not_found" above. If `stale` + `fresh` is not equal
  // to `found`, then it means that the policy has translated a found
  // value into a miss or failure. The policy may do this in order to
  // cause the caching filter to treat the value that was found in
  // cache in the way it would have treated a miss or failure from
  // cache.
  private[this] val resultStats: StatsReceiver = statsReceiver.scope("result")
  private[this] val resultFreshCounter: Counter = resultStats.counter("fresh")
  private[this] val resultStaleCounter: Counter = resultStats.counter("stale")
  private[this] val resultMissCounter: Counter = resultStats.counter("miss")
  private[this] val resultFailureCounter: Counter = resultStats.counter("failure")
  // Used for recording exceptions that occurred during
  // deserialization. This will never be incremented if the
  // deserializer returns a result, even if the result is a
  // [[CacheResult.Failure]]. See the comment where this stat is
  // incremented for more details.
  private[this] val deserializeFailureStats: StatsReceiver = statsReceiver.scope("deserialize")
  private[this] val notSerializedCounter: Counter = statsReceiver.counter("not_serialized")
  /**
   * Load a batch of values from cache. Mostly this deals with
   * converting the [[memcached.GetResult]] to a
   * [[Seq[CachedResult[V]]]]. The result is in the same order as the
   * keys, and there will always be an entry for each key. This method
   * should never return a [[Future.exception]].
   */
  def get(keys: Seq[K]): Future[Seq[CacheResult[V]]] = {
    memcachedGetCounter.incr(keys.size)
    val cacheKeys: Seq[String] = keys.map(keySerializer)
    if (logger.isTraceEnabled) {
      logger.trace {
        val lines: Seq[String] = keys.zip(cacheKeys).map { case (k, c) => s"\n  $k ($c)" }
        "Starting load for keys:" + lines.mkString
      }
    }
    memcachedClient
      .getResult(cacheKeys)
      .map { getResult =>
        memcachedNotFoundCounter.incr(getResult.misses.size)
        val results: Seq[CacheResult[V]] =
          cacheKeys.map { cacheKey =>
            val result: CacheResult[V] =
              getResult.hits.get(cacheKey) match {
                case Some(memcachedValue) =>
                  memcachedFoundCounter.incr()
                  try {
                    valueSerializer.deserialize(memcachedValue.value)
                  } catch {
                    case NonFatal(e) =>
                      // If the serializer throws an exception, then
                      // the serialized value was malformed. In that
                      // case, we record the failure so that it can be
                      // detected and fixed, but treat it as a cache
                      // miss. The reason that we treat it as a miss
                      // rather than a failure is that a miss will
                      // cause a write back to cache, and we want to
                      // write a valid result back to cache to replace
                      // the bad entry that we just loaded.
                      //
                      // A serializer is free to return Miss itself to
                      // obtain this behavior if it is expected or
                      // desired, to avoid the logging and stats (and
                      // the minor overhead of catching an exception).
                      //
                      // The exceptions are tracked separately from
                      // other exceptions so that it is easy to see
                      // whether the deserializer itself ever throws an
                      // exception.
                      exceptionStatsHandler.record(deserializeFailureStats, e)
                      logger.warn(s"Failed deserializing value for cache key $cacheKey", e)
                      CacheResult.Miss
                  }
                case None if getResult.misses.contains(cacheKey) =>
                  CacheResult.Miss
                case None =>
                  val exception =
                    getResult.failures.get(cacheKey) match {
                      case None =>
                        // To get here, this was not a hit or a miss,
                        // so we expect the key to be present in
                        // failures. If it is not, then either the
                        // contract of getResult was violated, or this
                        // method is somehow attempting to access a
                        // result for a key that was not
                        // loaded. Either of these indicates a bug, so
                        // we log a high priority log message.
                        logger.error(
                          s"Key $cacheKey not found in hits, misses or failures. " +
                            "This indicates a bug in the memcached library or " +
                            "CacheOperations.load"
                        )
                        // We return this as a failure because that
                        // will cause the repo to be consulted and the
                        // value *not* to be written back to cache,
                        // which is probably the safest thing to do
                        // (if we don't know what's going on, default
                        // to an uncached repo).
                        new IllegalStateException
                      case Some(e) =>
                        e
                    }
                  exceptionStatsHandler.record(memcachedStats, exception)
                  CacheResult.Failure(exception)
              }
            // Count each kind of CacheResult, to make it possible to
            // see how effective the caching is.
            result match {
              case CacheResult.Fresh(_) => resultFreshCounter.incr()
              case CacheResult.Stale(_) => resultStaleCounter.incr()
              case CacheResult.Miss => resultMissCounter.incr()
              case CacheResult.Failure(_) => resultFailureCounter.incr()
            }
            result
          }
        if (logger.isTraceEnabled) {
          logger.trace {
            val lines: Seq[String] =
              (keys, cacheKeys, results).zipped.map {
                case (key, cacheKey, result) => s"\n  $key ($cacheKey) -> $result"
              }
            "Cache results:" + lines.mkString
          }
        }
        results
      }
      .handle {
        case e =>
          // If there is a failure from the memcached client, fan it
          // out to each cache key, so that the caller does not need
          // to handle failure of the batch differently than failure
          // of individual keys. This should be rare anyway, since the
          // memcached client already does this for common Finagle
          // exceptions
          resultFailureCounter.incr(keys.size)
          val theFailure: CacheResult[V] = CacheResult.Failure(e)
          keys.map { _ =>
            // Record this as many times as we would if it were in the GetResult
            exceptionStatsHandler.record(memcachedStats, e)
            theFailure
          }
      }
  }
  // Incremented for every attempt to `set` a key in value.
  private[this] val memcachedSetCounter: Counter = memcachedStats.counter("set")
  /**
   * Write an entry back to cache, using `set`. If the serializer does
   * not serialize the value, then this method will immediately return
   * with success.
   */
  def set(key: K, value: V): Future[Unit] =
    valueSerializer.serialize(value) match {
      case Some((expiry, serialized)) =>
        if (logger.isTraceEnabled) {
          logger.trace(s"Writing back to cache $key -> $value (expiry = $expiry)")
        }
        memcachedSetCounter.incr()
        memcachedClient
          .set(key = keySerializer(key), flags = 0, expiry = expiry, value = serialized)
          .onFailure(exceptionStatsHandler.record(memcachedStats, _))
      case None =>
        if (logger.isTraceEnabled) {
          logger.trace(s"Not writing back $key -> $value")
        }
        notSerializedCounter.incr()
        Future.Done
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala
@ -0,0 +1,45 @@
 package com.twitter.tweetypie.caching
 /**
 * Encodes the possible states of a value loaded from memcached.
 *
 * @see [[ValueSerializer]] and [[CacheOperations]]
 */
 sealed trait CacheResult[+V]
 object CacheResult {
  /**
   * Signals that the value could not be successfully loaded from
   * cache. `Failure` values should not be written back to cache.
   *
   * This value may result from an error talking to the memcached
   * instance or it may be returned from the Serializer when the value
   * should not be reused, but should also not be overwritten.
   */
  final case class Failure(e: Throwable) extends CacheResult[Nothing]
  /**
   * Signals that the cache load attempt was successful, but there was
   * not a usable value.
   *
   * When processing a `Miss`, the value should be written back to
   * cache if it loads successfully.
   */
  case object Miss extends CacheResult[Nothing]
  /**
   * Signals that the value was found in cache.
   *
   * It is not necessary to load the value from the original source.
   */
  case class Fresh[V](value: V) extends CacheResult[V]
  /**
   * Signals that the value was found in cache.
   *
   * This value should be used, but it should be refreshed
   * out-of-band.
   */
  case class Stale[V](value: V) extends CacheResult[V]
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala
@ -0,0 +1,34 @@
 package com.twitter.tweetypie.caching
 import com.twitter.util.Duration
 import com.twitter.util.Time
 /**
 * Helpers for creating common expiry functions.
 *
 * An expiry function maps from the value to a time in the future when
 * the value should expire from cache. These are useful in the
 * implementation of a [[ValueSerializer]].
 */
 object Expiry {
  /**
   * Return a time that indicates to memcached to never expire this
   * value.
   *
   * This function takes [[Any]] so that it can be used at any value
   * type, since it doesn't examine the value at all.
   */
  val Never: Any => Time =
    _ => Time.Top
  /**
   * Return function that indicates to memcached that the value should
   * not be used after the `ttl` has elapsed.
   *
   * This function takes [[Any]] so that it can be used at any value
   * type, since it doesn't examine the value at all.
   */
  def byAge(ttl: Duration): Any => Time =
    _ => Time.now + ttl
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala
@ -0,0 +1,140 @@
 package com.twitter.tweetypie.caching
 import com.twitter.io.Buf
 import com.twitter.scrooge.CompactThriftSerializer
 import com.twitter.scrooge.ThriftStruct
 import com.twitter.scrooge.ThriftStructCodec
 import com.twitter.servo.cache.thriftscala.CachedValue
 import com.twitter.servo.cache.thriftscala.CachedValueStatus
 import com.twitter.stitch.NotFound
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import com.twitter.util.Time
 import com.twitter.util.Try
 import java.nio.ByteBuffer
 object ServoCachedValueSerializer {
  /**
   * Thrown when the fields of the servo CachedValue struct do not
   * satisfy the invariants expected by this serialization code.
   */
  case class UnexpectedCachedValueState(cachedValue: CachedValue) extends Exception {
    def message: String = s"Unexpected state for CachedValue. Value was: $cachedValue"
  }
  val CachedValueThriftSerializer: CompactThriftSerializer[CachedValue] = CompactThriftSerializer(
    CachedValue)
 }
 /**
 * A [[ValueSerializer]] that is compatible with the use of
 * Servo's [[CachedValue]] struct by tweetypie:
 *
 * - The only [[CachedValueStatus]] values that are cacheable are
 *   [[CachedValueStatus.Found]] and [[CachedValueStatus.NotFound]].
 *
 * - We only track the `cachedAtMsec` field, because tweetypie's cache
 *   interaction does not use the other fields, and the values that
 *   are cached this way are never updated, so storing readThroughAt
 *   or writtenThroughAt would not add any information.
 *
 * - When values are present, they are serialized using
 *   [[org.apache.thrift.protocol.TCompactProtocol]].
 *
 * - The CachedValue struct itself is also serialized using TCompactProtocol.
 *
 * The serializer operates on [[Try]] values and will cache [[Return]]
 * and `Throw(NotFound)` values.
 */
 case class ServoCachedValueSerializer[V <: ThriftStruct](
  codec: ThriftStructCodec[V],
  expiry: Try[V] => Time,
  softTtl: SoftTtl[Try[V]])
    extends ValueSerializer[Try[V]] {
  import ServoCachedValueSerializer.UnexpectedCachedValueState
  import ServoCachedValueSerializer.CachedValueThriftSerializer
  private[this] val ValueThriftSerializer = CompactThriftSerializer(codec)
  /**
   * Return an expiry based on the value and a
   * TCompactProtocol-encoded servo CachedValue struct with the
   * following fields defined:
   *
   * - `value`: [[None]]
   *   for {{{Throw(NotFound)}}, {{{Some(encodedStruct)}}} for
   *   [[Return]], where {{{encodedStruct}}} is a
   *   TCompactProtocol-encoding of the value inside of the Return.
   *
   * - `status`: [[CachedValueStatus.Found]] if the value is Return,
   *   and [[CachedValueStatus.NotFound]] if it is Throw(NotFound)
   *
   * - `cachedAtMsec`: The current time, accoring to [[Time.now]]
   *
   * No other fields will be defined.
   *
   * @throws IllegalArgumentException if called with a value that
   *   should not be cached.
   */
  override def serialize(value: Try[V]): Option[(Time, Buf)] = {
    def serializeCachedValue(payload: Option[ByteBuffer]) = {
      val cachedValue = CachedValue(
        value = payload,
        status = if (payload.isDefined) CachedValueStatus.Found else CachedValueStatus.NotFound,
        cachedAtMsec = Time.now.inMilliseconds)
      val serialized = Buf.ByteArray.Owned(CachedValueThriftSerializer.toBytes(cachedValue))
      (expiry(value), serialized)
    }
    value match {
      case Throw(NotFound) =>
        Some(serializeCachedValue(None))
      case Return(struct) =>
        val payload = Some(ByteBuffer.wrap(ValueThriftSerializer.toBytes(struct)))
        Some(serializeCachedValue(payload))
      case _ =>
        None
    }
  }
  /**
   * Deserializes values serialized by [[serializeValue]]. The
   * value will be [[CacheResult.Fresh]] or [[CacheResult.Stale]]
   * depending on the result of {{{softTtl.isFresh}}}.
   *
   * @throws UnexpectedCachedValueState if the state of the
   *   [[CachedValue]] could not be produced by [[serialize]]
   */
  override def deserialize(buf: Buf): CacheResult[Try[V]] = {
    val cachedValue = CachedValueThriftSerializer.fromBytes(Buf.ByteArray.Owned.extract(buf))
    val hasValue = cachedValue.value.isDefined
    val isValid =
      (hasValue && cachedValue.status == CachedValueStatus.Found) ||
        (!hasValue && cachedValue.status == CachedValueStatus.NotFound)
    if (!isValid) {
      // Exceptions thrown by deserialization are recorded and treated
      // as a cache miss by CacheOperations, so throwing this
      // exception will cause the value in cache to be
      // overwritten. There will be stats recorded whenever this
      // happens.
      throw UnexpectedCachedValueState(cachedValue)
    }
    val value =
      cachedValue.value match {
        case Some(valueBuffer) =>
          val valueBytes = new Array[Byte](valueBuffer.remaining)
          valueBuffer.duplicate.get(valueBytes)
          Return(ValueThriftSerializer.fromBytes(valueBytes))
        case None =>
          Throw(NotFound)
      }
    softTtl.toCacheResult(value, Time.fromMilliseconds(cachedValue.cachedAtMsec))
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala
@ -0,0 +1,120 @@
 package com.twitter.tweetypie.caching
 import com.twitter.util.Duration
 import com.twitter.util.Time
 import scala.util.Random
 import com.twitter.logging.Logger
 /**
 * Used to determine whether values successfully retrieved from cache
 * are [[CacheResult.Fresh]] or [[CacheResult.Stale]]. This is useful
 * in the implementation of a [[ValueSerializer]].
 */
 trait SoftTtl[-V] {
  /**
   * Determines whether a cached value was fresh.
   *
   * @param cachedAt  the time at which the value was cached.
   */
  def isFresh(value: V, cachedAt: Time): Boolean
  /**
   * Wraps the value in Fresh or Stale depending on the value of `isFresh`.
   *
   * (The type variable U exists because it is not allowed to return
   * values of a contravariant type, so we must define a variable that
   * is a specific subclass of V. This is worth it because it allows
   * us to create polymorphic policies without having to specify the
   * type. Another solution would be to make the type invariant, but
   * then we would have to specify the type whenever we create an
   * instance.)
   */
  def toCacheResult[U <: V](value: U, cachedAt: Time): CacheResult[U] =
    if (isFresh(value, cachedAt)) CacheResult.Fresh(value) else CacheResult.Stale(value)
 }
 object SoftTtl {
  /**
   * Regardless of the inputs, the value will always be considered
   * fresh.
   */
  object NeverRefresh extends SoftTtl[Any] {
    override def isFresh(_unusedValue: Any, _unusedCachedAt: Time): Boolean = true
  }
  /**
   * Trigger refresh based on the length of time that a value has been
   * stored in cache, ignoring the value.
   *
   * @param softTtl Items that were cached longer ago than this value
   *   will be refreshed when they are accessed.
   *
   * @param jitter Add nondeterminism to the soft TTL to prevent a
   *   thundering herd of requests refreshing the value at the same
   *   time. The time at which the value is considered stale will be
   *   uniformly spread out over a range of +/- (jitter/2). It is
   *   valid to set the jitter to zero, which will turn off jittering.
   *
   * @param logger If non-null, use this logger rather than one based
   *   on the class name. This logger is only used for trace-level
   *   logging.
   */
  case class ByAge[V](
    softTtl: Duration,
    jitter: Duration,
    specificLogger: Logger = null,
    rng: Random = Random)
      extends SoftTtl[Any] {
    private[this] val logger: Logger =
      if (specificLogger == null) Logger(getClass) else specificLogger
    private[this] val maxJitterMs: Long = jitter.inMilliseconds
    // this requirement is due to using Random.nextInt to choose the
    // jitter, but it allows jitter of greater than 24 days
    require(maxJitterMs <= (Int.MaxValue / 2))
    // Negative jitter probably indicates misuse of the API
    require(maxJitterMs >= 0)
    // we want period +/- jitter, but the random generator
    // generates non-negative numbers, so we generate [0, 2 *
    // maxJitter) and subtract maxJitter to obtain [-maxJitter,
    // maxJitter)
    private[this] val maxJitterRangeMs: Int = (maxJitterMs * 2).toInt
    // We perform all calculations in milliseconds, so convert the
    // period to milliseconds out here.
    private[this] val softTtlMs: Long = softTtl.inMilliseconds
    // If the value is below this age, it will always be fresh,
    // regardless of jitter.
    private[this] val alwaysFreshAgeMs: Long = softTtlMs - maxJitterMs
    // If the value is above this age, it will always be stale,
    // regardless of jitter.
    private[this] val alwaysStaleAgeMs: Long = softTtlMs + maxJitterMs
    override def isFresh(value: Any, cachedAt: Time): Boolean = {
      val ageMs: Long = (Time.now - cachedAt).inMilliseconds
      val fresh =
        if (ageMs <= alwaysFreshAgeMs) {
          true
        } else if (ageMs > alwaysStaleAgeMs) {
          false
        } else {
          val jitterMs: Long = rng.nextInt(maxJitterRangeMs) - maxJitterMs
          ageMs <= softTtlMs + jitterMs
        }
      logger.ifTrace(
        s"Checked soft ttl: fresh = $fresh, " +
          s"soft_ttl_ms = $softTtlMs, age_ms = $ageMs, value = $value")
      fresh
    }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala
@ -0,0 +1,65 @@
 package com.twitter.tweetypie.caching
 import scala.collection.mutable
 import com.twitter.util.Future
 import com.twitter.stitch.Stitch
 import com.twitter.stitch.Runner
 import com.twitter.stitch.FutureRunner
 import com.twitter.stitch.Group
 /**
 * Workaround for a infelicity in the implementation of [[Stitch.async]].
 *
 * This has the same semantics to [[Stitch.async]], with the exception
 * that interrupts to the main computation will not interrupt the
 * async call.
 *
 * The problem that this implementation solves is that we do not want
 * async calls grouped together with synchronous calls. See the
 * mailing list thread [1] for discussion. This may eventually be
 * fixed in Stitch.
 */
 private[caching] object StitchAsync {
  // Contains a deferred Stitch that we want to run asynchronously
  private[this] class AsyncCall(deferred: => Stitch[_]) {
    def call(): Stitch[_] = deferred
  }
  private object AsyncGroup extends Group[AsyncCall, Unit] {
    override def runner(): Runner[AsyncCall, Unit] =
      new FutureRunner[AsyncCall, Unit] {
        // All of the deferred calls of any type. When they are
        // executed in `run`, the normal Stitch batching and deduping
        // will occur.
        private[this] val calls = new mutable.ArrayBuffer[AsyncCall]
        def add(call: AsyncCall): Stitch[Unit] = {
          // Just remember the deferred call.
          calls.append(call)
          // Since we don't wait for the completion of the effect,
          // just return a constant value.
          Stitch.Unit
        }
        def run(): Future[_] = {
          // The future returned from this innter invocation of
          // Stitch.run is not linked to the returned future, so these
          // effects are not linked to the outer Run in which this
          // method was invoked.
          Stitch.run {
            Stitch.traverse(calls) { asyncCall: AsyncCall =>
              asyncCall
                .call()
                .liftToTry // So that an exception will not interrupt the other calls
            }
          }
          Future.Unit
        }
      }
  }
  def apply(call: => Stitch[_]): Stitch[Unit] =
    // Group together all of the async calls
    Stitch.call(new AsyncCall(call), AsyncGroup)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala
@ -0,0 +1,62 @@
 package com.twitter.tweetypie.caching
 import com.twitter.stitch.MapGroup
 import com.twitter.stitch.Group
 import com.twitter.stitch.Stitch
 import com.twitter.util.Future
 import com.twitter.util.Return
 import com.twitter.util.Try
 /**
 * Wrapper around [[CacheOperations]] providing a [[Stitch]] API.
 */
 case class StitchCacheOperations[K, V](operations: CacheOperations[K, V]) {
  import StitchCacheOperations.SetCall
  private[this] val getGroup: Group[K, CacheResult[V]] =
    MapGroup[K, CacheResult[V]] { keys: Seq[K] =>
      operations
        .get(keys)
        .map(values => keys.zip(values).toMap.mapValues(Return(_)))
    }
  def get(key: K): Stitch[CacheResult[V]] =
    Stitch.call(key, getGroup)
  private[this] val setGroup: Group[SetCall[K, V], Unit] =
    new MapGroup[SetCall[K, V], Unit] {
      override def run(calls: Seq[SetCall[K, V]]): Future[SetCall[K, V] => Try[Unit]] =
        Future
          .collectToTry(calls.map(call => operations.set(call.key, call.value)))
          .map(tries => calls.zip(tries).toMap)
    }
  /**
   * Performs a [[CacheOperations.set]].
   */
  def set(key: K, value: V): Stitch[Unit] =
    // This is implemented as a Stitch.call instead of a Stitch.future
    // in order to handle the case where a batch has a duplicate
    // key. Each copy of the duplicate key will trigger a write back
    // to cache, so we dedupe the writes in order to avoid the
    // extraneous RPC call.
    Stitch.call(new StitchCacheOperations.SetCall(key, value), setGroup)
 }
 object StitchCacheOperations {
  /**
   * Used as the "call" for [[SetGroup]]. This is essentially a tuple
   * where equality is defined only by the key.
   */
  private class SetCall[K, V](val key: K, val value: V) {
    override def equals(other: Any): Boolean =
      other match {
        case setCall: SetCall[_, _] => key == setCall.key
        case _ => false
      }
    override def hashCode: Int = key.hashCode
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala
@ -0,0 +1,36 @@
 package com.twitter.tweetypie.caching
 import com.twitter.stitch.Stitch
 /**
 * Apply caching to a [[Stitch]] function.
 *
 * @see CacheResult for more information about the semantics
 *   implemented here.
 */
 class StitchCaching[K, V](operations: CacheOperations[K, V], repo: K => Stitch[V])
    extends (K => Stitch[V]) {
  private[this] val stitchOps = new StitchCacheOperations(operations)
  override def apply(key: K): Stitch[V] =
    stitchOps.get(key).flatMap {
      case CacheResult.Fresh(value) =>
        Stitch.value(value)
      case CacheResult.Stale(staleValue) =>
        StitchAsync(repo(key).flatMap(refreshed => stitchOps.set(key, refreshed)))
          .map(_ => staleValue)
      case CacheResult.Miss =>
        repo(key)
          .applyEffect(value => StitchAsync(stitchOps.set(key, value)))
      case CacheResult.Failure(_) =>
        // In the case of failure, we don't attempt to write back to
        // cache, because cache failure usually means communication
        // failure, and sending more requests to the cache that holds
        // the value for this key could make the situation worse.
        repo(key)
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala
@ -0,0 +1,47 @@
 package com.twitter.tweetypie.caching
 import com.twitter.io.Buf
 import com.twitter.util.Time
 /**
 * How to store values of type V in cache. This includes whether a
 * given value is cacheable, how to serialize it, when it should
 * expire from cache, and how to interpret byte patterns from cache.
 */
 trait ValueSerializer[V] {
  /**
   * Prepare the value for storage in cache. When a [[Some]] is
   * returned, the [[Buf]] should be a valid input to [[deserialize]]
   * and the [[Time]] will be used as the expiry in the memcached
   * command.  When [[None]] is returned, it indicates that the value
   * cannot or should not be written back to cache.
   *
   * The most common use case for returning None is caching Try
   * values, where certain exceptional values encode a cacheable state
   * of a value. In particular, Throw(NotFound) is commonly used to
   * encode a missing value, and we usually want to cache those
   * negative lookups, but we don't want to cache e.g. a timeout
   * exception.
   *
   * @return a pair of expiry time for this cache entry and the bytes
   *   to store in cache. If you do not want this value to explicitly
   *   expire, use Time.Top as the expiry.
   */
  def serialize(value: V): Option[(Time, Buf)]
  /**
   * Deserialize a value found in cache. This function converts the
   * bytes found in memcache to a [[CacheResult]]. In general, you
   * probably want to return [[CacheResult.Fresh]] or
   * [[CacheResult.Stale]], but you are free to return any of the
   * range of [[CacheResult]]s, depending on the behavior that you
   * want.
   *
   * This is a total function because in the common use case, the
   * bytes stored in cache will be appropriate for the
   * serializer. This method is free to throw any exception if the
   * bytes are not valid.
   */
  def deserialize(serializedValue: Buf): CacheResult[V]
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD
@ -0,0 +1,15 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
        "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/transport",
        "finagle/finagle-thrift/src/main/scala",
        "tweetypie/servo/util/src/main/scala:exception",
        "strato/src/main/scala/com/twitter/strato/access",
        "strato/src/main/scala/com/twitter/strato/data",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala
@ -0,0 +1,185 @@
 package com.twitter.tweetypie.client_id
 import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier
 import com.twitter.finagle.mtls.authentication.ServiceIdentifier
 import com.twitter.finagle.mtls.transport.S2STransport
 import com.twitter.finagle.thrift.ClientId
 import com.twitter.servo.util.Gate
 import com.twitter.strato.access.Access
 import com.twitter.strato.access.Access.ForwardedServiceIdentifier
 object ClientIdHelper {
  val UnknownClientId = "unknown"
  def default: ClientIdHelper = new ClientIdHelper(UseTransportServiceIdentifier)
  /**
   * Trims off the last .element, which is usually .prod or .staging
   */
  def getClientIdRoot(clientId: String): String =
    clientId.lastIndexOf('.') match {
      case -1 => clientId
      case idx => clientId.substring(0, idx)
    }
  /**
   * Returns the last .element without the '.'
   */
  def getClientIdEnv(clientId: String): String =
    clientId.lastIndexOf('.') match {
      case -1 => clientId
      case idx => clientId.substring(idx + 1)
    }
  private[client_id] def asClientId(s: ServiceIdentifier): String = s"${s.service}.${s.environment}"
 }
 class ClientIdHelper(serviceIdentifierStrategy: ServiceIdentifierStrategy) {
  private[client_id] val ProcessPathPrefix = "/p/"
  /**
   * The effective client id is used for request authorization and metrics
   * attribution. For calls to Tweetypie's thrift API, the thrift ClientId
   * is used and is expected in the form of "service-name.env". Federated
   * Strato clients don't support configured ClientIds and instead provide
   * a "process path" containing instance-specific information. So for
   * calls to the federated API, we compute an effective client id from
   * the ServiceIdentifier, if present, in Strato's Access principles. The
   * implementation avoids computing this identifier unless necessary,
   * since this method is invoked on every request.
   */
  def effectiveClientId: Option[String] = {
    val clientId: Option[String] = ClientId.current.map(_.name)
    clientId
    // Exclude process paths because they are instance-specific and aren't
    // supported by tweetypie for authorization or metrics purposes.
      .filterNot(_.startsWith(ProcessPathPrefix))
      // Try computing a value from the ServiceId if the thrift
      // ClientId is undefined or unsupported.
      .orElse(serviceIdentifierStrategy.serviceIdentifier.map(ClientIdHelper.asClientId))
      // Ultimately fall back to the ClientId value, even when given an
      // unsupported format, so that error text and debug logs include
      // the value passed by the caller.
      .orElse(clientId)
  }
  def effectiveClientIdRoot: Option[String] = effectiveClientId.map(ClientIdHelper.getClientIdRoot)
  def effectiveServiceIdentifier: Option[ServiceIdentifier] =
    serviceIdentifierStrategy.serviceIdentifier
 }
 /** Logic how to find a [[ServiceIdentifier]] for the purpose of crafting a client ID. */
 trait ServiceIdentifierStrategy {
  def serviceIdentifier: Option[ServiceIdentifier]
  /**
   * Returns the only element of given [[Set]] or [[None]].
   *
   * This utility is used defensively against a set of principals collected
   * from [[Access.getPrincipals]]. While the contract is that there should be at most one
   * instance of each principal kind present in that set, in practice that has not been the case
   * always. The safest strategy to in that case is to abandon a set completely if more than
   * one principals are competing.
   */
  final protected def onlyElement[T](set: Set[T]): Option[T] =
    if (set.size <= 1) {
      set.headOption
    } else {
      None
    }
 }
 /**
 * Picks [[ServiceIdentifier]] from Finagle SSL Transport, if one exists.
 *
 * This works for both Thrift API calls as well as StratoFed API calls. Strato's
 * [[Access#getPrincipals]] collection, which would typically be consulted by StratoFed
 * column logic, contains the same [[ServiceIdentifier]] derived from the Finagle SSL
 * transport, so there's no need to have separate strategies for Thrift vs StratoFed
 * calls.
 *
 * This is the default behavior of using [[ServiceIdentifier]] for computing client ID.
 */
 private[client_id] class UseTransportServiceIdentifier(
  // overridable for testing
  getPeerServiceIdentifier: => ServiceIdentifier,
 ) extends ServiceIdentifierStrategy {
  override def serviceIdentifier: Option[ServiceIdentifier] =
    getPeerServiceIdentifier match {
      case EmptyServiceIdentifier => None
      case si => Some(si)
    }
 }
 object UseTransportServiceIdentifier
    extends UseTransportServiceIdentifier(S2STransport.peerServiceIdentifier)
 /**
 * Picks [[ForwardedServiceIdentifier]] from Strato principals for client ID
 * if [[ServiceIdentifier]] points at call coming from Strato.
 * If not present, falls back to [[UseTransportServiceIdentifier]] behavior.
 *
 * Tweetypie utilizes the strategy to pick [[ServiceIdentifier]] for the purpose
 * of generating a client ID when the client ID is absent or unknown.
 * [[PreferForwardedServiceIdentifierForStrato]] looks for the [[ForwardedServiceIdentifier]]
 * values set by stratoserver request.
 * The reason is, stratoserver is effectively a conduit, forwarding the [[ServiceIdentifier]]
 * of the _actual client_ that is calling stratoserver.
 * Any direct callers not going through stratoserver will default to [[ServiceIdentfier]].
 */
 private[client_id] class PreferForwardedServiceIdentifierForStrato(
  // overridable for testing
  getPeerServiceIdentifier: => ServiceIdentifier,
 ) extends ServiceIdentifierStrategy {
  val useTransportServiceIdentifier =
    new UseTransportServiceIdentifier(getPeerServiceIdentifier)
  override def serviceIdentifier: Option[ServiceIdentifier] =
    useTransportServiceIdentifier.serviceIdentifier match {
      case Some(serviceIdentifier) if isStrato(serviceIdentifier) =>
        onlyElement(
          Access.getPrincipals
            .collect {
              case forwarded: ForwardedServiceIdentifier =>
                forwarded.serviceIdentifier.serviceIdentifier
            }
        ).orElse(useTransportServiceIdentifier.serviceIdentifier)
      case other => other
    }
  /**
   * Strato uses various service names like "stratoserver" and "stratoserver-patient".
   * They all do start with "stratoserver" though, so at the point of implementing,
   * the safest bet to recognize strato is to look for this prefix.
   *
   * This also works for staged strato instances (which it should), despite allowing
   * for technically any caller to force this strategy, by creating service certificate
   * with this service name.
   */
  private def isStrato(serviceIdentifier: ServiceIdentifier): Boolean =
    serviceIdentifier.service.startsWith("stratoserver")
 }
 object PreferForwardedServiceIdentifierForStrato
    extends PreferForwardedServiceIdentifierForStrato(S2STransport.peerServiceIdentifier)
 /**
 * [[ServiceIdentifierStrategy]] which dispatches between two delegates based on the value
 * of a unitary decider every time [[serviceIdentifier]] is called.
 */
 class ConditionalServiceIdentifierStrategy(
  private val condition: Gate[Unit],
  private val ifTrue: ServiceIdentifierStrategy,
  private val ifFalse: ServiceIdentifierStrategy)
    extends ServiceIdentifierStrategy {
  override def serviceIdentifier: Option[ServiceIdentifier] =
    if (condition()) {
      ifTrue.serviceIdentifier
    } else {
      ifFalse.serviceIdentifier
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD
@ -0,0 +1,19 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    provides = scala_artifact(
        org = "com.twitter.tweetypie",
        name = "context",
        repo = artifactory,
    ),
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "finagle/finagle-core/src/main",
        "graphql/common/src/main/scala/com/twitter/graphql/common/core",
        "src/thrift/com/twitter/context:twitter-context-scala",
        "twitter-context/src/main/scala",
        "util/util-core:scala",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala
@ -0,0 +1,135 @@
 package com.twitter.tweetypie.context
 import com.twitter.context.TwitterContext
 import com.twitter.finagle.Filter
 import com.twitter.finagle.Service
 import com.twitter.finagle.SimpleFilter
 import com.twitter.finagle.context.Contexts
 import com.twitter.io.Buf
 import com.twitter.io.Buf.ByteArray.Owned
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.graphql.common.core.GraphQlClientApplication
 import com.twitter.util.Try
 import java.nio.charset.StandardCharsets.UTF_8
 import scala.util.matching.Regex
 /**
 * Context and filters to help track callers of Tweetypie's endpoints. This context and its
 * filters were originally added to provide visibility into callers of Tweetypie who are
 * using the birdherd library to access tweets.
 *
 * This context data is intended to be marshalled by callers to Tweetypie, but then the
 * context data is stripped (moved from broadcast to local). This happens so that the
 * context data is not forwarded down tweetypie's backend rpc chains, which often result
 * in transitive calls back into tweetypie. This effectively creates single-hop marshalling.
 */
 object TweetypieContext {
  // Bring Tweetypie permitted TwitterContext into scope
  val TwitterContext: TwitterContext =
    com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit)
  case class Ctx(via: String)
  val Empty = Ctx("")
  object Broadcast {
    private[this] object Key extends Contexts.broadcast.Key[Ctx](id = Ctx.getClass.getName) {
      override def marshal(value: Ctx): Buf =
        Owned(value.via.getBytes(UTF_8))
      override def tryUnmarshal(buf: Buf): Try[Ctx] =
        Try(Ctx(new String(Owned.extract(buf), UTF_8)))
    }
    private[TweetypieContext] def current(): Option[Ctx] =
      Contexts.broadcast.get(Key)
    def currentOrElse(default: Ctx): Ctx =
      current().getOrElse(default)
    def letClear[T](f: => T): T =
      Contexts.broadcast.letClear(Key)(f)
    def let[T](ctx: Ctx)(f: => T): T =
      if (Empty == ctx) {
        letClear(f)
      } else {
        Contexts.broadcast.let(Key, ctx)(f)
      }
    // ctx has to be by name so we can re-evaluate it for every request (for usage in ServiceTwitter.scala)
    def filter(ctx: => Ctx): Filter.TypeAgnostic =
      new Filter.TypeAgnostic {
        override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] =
          (request: Req, service: Service[Req, Rep]) => Broadcast.let(ctx)(service(request))
      }
  }
  object Local {
    private[this] val Key =
      new Contexts.local.Key[Ctx]
    private[TweetypieContext] def let[T](ctx: Option[Ctx])(f: => T): T =
      ctx match {
        case Some(ctx) if ctx != Empty => Contexts.local.let(Key, ctx)(f)
        case None => Contexts.local.letClear(Key)(f)
      }
    def current(): Option[Ctx] =
      Contexts.local.get(Key)
    def filter[Req, Rep]: SimpleFilter[Req, Rep] =
      (request: Req, service: Service[Req, Rep]) => {
        val ctx = Broadcast.current()
        Broadcast.letClear(Local.let(ctx)(service(request)))
      }
    private[this] def clientAppIdToName(clientAppId: Long) =
      GraphQlClientApplication.AllById.get(clientAppId).map(_.name).getOrElse("nonTOO")
    private[this] val pathRegexes: Seq[(Regex, String)] = Seq(
      ("timeline_conversation_.*_json".r, "timeline_conversation__slug__json"),
      ("user_timeline_.*_json".r, "user_timeline__user__json"),
      ("[0-9]{2,}".r, "_id_")
    )
    // `context.via` will either be a string like: "birdherd" or "birdherd:/1.1/statuses/show/123.json,
    // depending on whether birdherd code was able to determine the path of the request.
    private[this] def getViaAndPath(via: String): (String, Option[String]) =
      via.split(":", 2) match {
        case Array(via, path) =>
          val sanitizedPath = path
            .replace('/', '_')
            .replace('.', '_')
          // Apply each regex in turn
          val normalizedPath = pathRegexes.foldLeft(sanitizedPath) {
            case (path, (regex, replacement)) => regex.replaceAllIn(path, replacement)
          }
          (via, Some(normalizedPath))
        case Array(via) => (via, None)
      }
    def trackStats[U](scopes: StatsReceiver*): Unit =
      for {
        tweetypieCtx <- TweetypieContext.Local.current()
        (via, pathOpt) = getViaAndPath(tweetypieCtx.via)
        twitterCtx <- TwitterContext()
        clientAppId <- twitterCtx.clientApplicationId
      } yield {
        val clientAppName = clientAppIdToName(clientAppId)
        scopes.foreach { stats =>
          val ctxStats = stats.scope("context")
          val viaStats = ctxStats.scope("via", via)
          viaStats.scope("all").counter("requests").incr()
          val viaClientStats = viaStats.scope("by_client", clientAppName)
          viaClientStats.counter("requests").incr()
          pathOpt.foreach { path =>
            val viaPathStats = viaStats.scope("by_path", path)
            viaPathStats.counter("requests").incr()
          }
        }
      }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD
@ -0,0 +1,15 @@
 scala_library(
    sources = ["DeciderGates.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "3rdparty/jvm/com/google/guava",
        "decider",
        "finagle/finagle-toggle/src/main/scala/com/twitter/finagle/server",
        "tweetypie/servo/decider",
        "tweetypie/servo/util/src/main/scala",
        "util/util-core:scala",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala
@ -0,0 +1,60 @@
 package com.twitter.tweetypie
 package decider
 import com.google.common.hash.Hashing
 import com.twitter.decider.Decider
 import com.twitter.decider.Feature
 import com.twitter.servo.gate.DeciderGate
 import com.twitter.servo.util.Gate
 import java.nio.charset.StandardCharsets
 import scala.collection.mutable
 trait DeciderGates {
  def overrides: Map[String, Boolean] = Map.empty
  def decider: Decider
  def prefix: String
  protected val seenFeatures: mutable.HashSet[String] = new mutable.HashSet[String]
  private def deciderFeature(name: String): Feature = {
    decider.feature(prefix + "_" + name)
  }
  def withOverride[T](name: String, mkGate: Feature => Gate[T]): Gate[T] = {
    seenFeatures += name
    overrides.get(name).map(Gate.const).getOrElse(mkGate(deciderFeature(name)))
  }
  protected def linear(name: String): Gate[Unit] = withOverride[Unit](name, DeciderGate.linear)
  protected def byId(name: String): Gate[Long] = withOverride[Long](name, DeciderGate.byId)
  /**
   * It returns a Gate[String] that can be used to check availability of the feature.
   * The string is hashed into a Long and used as an "id" and then used to call servo's
   * DeciderGate.byId
   *
   * @param name decider name
   * @return Gate[String]
   */
  protected def byStringId(name: String): Gate[String] =
    byId(name).contramap { s: String =>
      Hashing.sipHash24().hashString(s, StandardCharsets.UTF_8).asLong()
    }
  def all: Traversable[String] = seenFeatures
  def unusedOverrides: Set[String] = overrides.keySet.diff(all.toSet)
  /**
   * Generate a map of name -> availability, taking into account overrides.
   * Overrides are either on or off so map to 10000 or 0, respectively.
   */
  def availabilityMap: Map[String, Option[Int]] =
    all.map { name =>
      val availability: Option[Int] = overrides
        .get(name)
        .map(on => if (on) 10000 else 0)
        .orElse(deciderFeature(name).availability)
      name -> availability
    }.toMap
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD
@ -0,0 +1,10 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "decider",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala
@ -0,0 +1,42 @@
 package com.twitter.tweetypie.decider.overrides
 import com.twitter.decider.LocalOverrides
 object TweetyPieDeciderOverrides extends LocalOverrides.Namespace("tweetypie", "tweetypie_") {
  val CheckSpamOnRetweet: LocalOverrides.Override = feature("check_spam_on_retweet")
  val CheckSpamOnTweet: LocalOverrides.Override = feature("check_spam_on_tweet")
  val ConversationControlUseFeatureSwitchResults: LocalOverrides.Override = feature(
    "conversation_control_use_feature_switch_results")
  val ConversationControlTweetCreateEnabled: LocalOverrides.Override = feature(
    "conversation_control_tweet_create_enabled")
  val EnableExclusiveTweetControlValidation: LocalOverrides.Override = feature(
    "enable_exclusive_tweet_control_validation")
  val EnableHotKeyCaches: LocalOverrides.Override = feature("enable_hot_key_caches")
  val HydrateConversationMuted: LocalOverrides.Override = feature("hydrate_conversation_muted")
  val HydrateExtensionsOnWrite: LocalOverrides.Override = feature("hydrate_extensions_on_write")
  val HydrateEscherbirdAnnotations: LocalOverrides.Override = feature(
    "hydrate_escherbird_annotations")
  val HydrateGnipProfileGeoEnrichment: LocalOverrides.Override = feature(
    "hydrate_gnip_profile_geo_enrichment")
  val HydratePastedPics: LocalOverrides.Override = feature("hydrate_pasted_pics")
  val HydratePerspectivesEditsForOtherSafetyLevels: LocalOverrides.Override = feature(
    "hydrate_perspectives_edits_for_other_levels")
  val HydrateScrubEngagements: LocalOverrides.Override = feature("hydrate_scrub_engagements")
  val LogRepoExceptions: LocalOverrides.Override = feature("log_repo_exceptions")
  val MediaRefsHydratorIncludePastedMedia: LocalOverrides.Override = feature(
    "media_refs_hydrator_include_pasted_media")
  val ShortCircuitLikelyPartialTweetReads: LocalOverrides.Override = feature(
    "short_circuit_likely_partial_tweet_reads_ms")
  val RateLimitByLimiterService: LocalOverrides.Override = feature("rate_limit_by_limiter_service")
  val RateLimitTweetCreationFailure: LocalOverrides.Override = feature(
    "rate_limit_tweet_creation_failure")
  val ReplyTweetConversationControlHydrationEnabled = feature(
    "reply_tweet_conversation_control_hydration_enabled"
  )
  val DisableInviteViaMention = feature(
    "disable_invite_via_mention"
  )
  val EnableRemoveUnmentionedImplicitMentions: LocalOverrides.Override = feature(
    "enable_remove_unmentioned_implicit_mentions")
  val useReplicatedDeleteTweet2: LocalOverrides.Override = feature("use_replicated_delete_tweet_2")
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD
@ -0,0 +1,15 @@
 scala_library(
    compiler_option_sets = ["fatal_warnings"],
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "finagle/finagle-core/src/main",
        "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift-scala",
        "tweetypie/servo/util/src/main/scala",
        "stitch/stitch-core",
        "strato/src/main/scala/com/twitter/strato/client",
        "tweetypie/server/src/main/scala/com/twitter/tweetypie/core",
        "util/util-core",
        "util/util-stats",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala
@ -0,0 +1,165 @@
 package com.twitter.tweetypie.jiminy.tweetypie
 import com.twitter.finagle.stats.CategorizingExceptionStatsHandler
 import com.twitter.finagle.stats.Stat
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.incentives.jiminy.thriftscala._
 import com.twitter.servo.util.FutureArrow
 import com.twitter.servo.util.Gate
 import com.twitter.stitch.Stitch
 import com.twitter.strato.thrift.ScroogeConvImplicits._
 import com.twitter.strato.client.{Client => StratoClient}
 import com.twitter.tweetypie.core.TweetCreateFailure
 import com.twitter.util.Future
 import com.twitter.util.Return
 import com.twitter.util.Throw
 case class NudgeBuilderRequest(
  text: String,
  inReplyToTweetId: Option[NudgeBuilder.TweetId],
  conversationId: Option[NudgeBuilder.TweetId],
  hasQuotedTweet: Boolean,
  nudgeOptions: Option[CreateTweetNudgeOptions],
  tweetId: Option[NudgeBuilder.TweetId])
 trait NudgeBuilder extends FutureArrow[NudgeBuilderRequest, Unit] {
  /**
   * Check whether the user should receive a nudge instead of creating
   * the Tweet. If nudgeOptions is None, then no nudge check will be
   * performed.
   *
   * @return a Future.exception containing a [[TweetCreateFailure]] if the
   *   user should be nudged, or Future.Unit if the user should not be
   *   nudged.
   */
  def apply(
    request: NudgeBuilderRequest
  ): Future[Unit]
 }
 object NudgeBuilder {
  type Type = FutureArrow[NudgeBuilderRequest, Unit]
  type TweetId = Long
  // darkTrafficCreateNudgeOptions ensure that our dark traffic sends a request that will
  // accurately test the Jiminy backend. in this case, we specify that we want checks for all
  // possible nudge types
  private[this] val darkTrafficCreateNudgeOptions = Some(
    CreateTweetNudgeOptions(
      requestedNudgeTypes = Some(
        Set(
          TweetNudgeType.PotentiallyToxicTweet,
          TweetNudgeType.ReviseOrMute,
          TweetNudgeType.ReviseOrHideThenBlock,
          TweetNudgeType.ReviseOrBlock
        )
      )
    )
  )
  private[this] def mkJiminyRequest(
    request: NudgeBuilderRequest,
    isDarkRequest: Boolean = false
  ): CreateTweetNudgeRequest = {
    val tweetType =
      if (request.inReplyToTweetId.nonEmpty) TweetType.Reply
      else if (request.hasQuotedTweet) TweetType.QuoteTweet
      else TweetType.OriginalTweet
    CreateTweetNudgeRequest(
      tweetText = request.text,
      tweetType = tweetType,
      inReplyToTweetId = request.inReplyToTweetId,
      conversationId = request.conversationId,
      createTweetNudgeOptions =
        if (isDarkRequest) darkTrafficCreateNudgeOptions else request.nudgeOptions,
      tweetId = request.tweetId
    )
  }
  /**
   * NudgeBuilder implemented by calling the strato column `incentives/createNudge`.
   *
   * Stats recorded:
   *   - latency_ms: Latency histogram (also implicitly number of
   *     invocations). This is counted only in the case that a nudge
   *     check was requested (`nudgeOptions` is non-empty)
   *
   *   - nudge: The nudge check succeeded and a nudge was created.
   *
   *   - no_nudge: The nudge check succeeded, but no nudge was created.
   *
   *   - failures: Calling strato to create a nudge failed. Broken out
   *     by exception.
   */
  def apply(
    nudgeArrow: FutureArrow[CreateTweetNudgeRequest, CreateTweetNudgeResponse],
    enableDarkTraffic: Gate[Unit],
    stats: StatsReceiver
  ): NudgeBuilder = {
    new NudgeBuilder {
      private[this] val nudgeLatencyStat = stats.stat("latency_ms")
      private[this] val nudgeCounter = stats.counter("nudge")
      private[this] val noNudgeCounter = stats.counter("no_nudge")
      private[this] val darkRequestCounter = stats.counter("dark_request")
      private[this] val nudgeExceptionHandler = new CategorizingExceptionStatsHandler
      override def apply(
        request: NudgeBuilderRequest
      ): Future[Unit] =
        request.nudgeOptions match {
          case None =>
            if (enableDarkTraffic()) {
              darkRequestCounter.incr()
              Stat
                .timeFuture(nudgeLatencyStat) {
                  nudgeArrow(mkJiminyRequest(request, isDarkRequest = true))
                }
                .transform { _ =>
                  // ignore the response since it is a dark request
                  Future.Done
                }
            } else {
              Future.Done
            }
          case Some(_) =>
            Stat
              .timeFuture(nudgeLatencyStat) {
                nudgeArrow(mkJiminyRequest(request))
              }
              .transform {
                case Throw(e) =>
                  nudgeExceptionHandler.record(stats, e)
                  // If we failed to invoke the nudge column, then
                  // just continue on with the Tweet creation.
                  Future.Done
                case Return(CreateTweetNudgeResponse(Some(nudge))) =>
                  nudgeCounter.incr()
                  Future.exception(TweetCreateFailure.Nudged(nudge = nudge))
                case Return(CreateTweetNudgeResponse(None)) =>
                  noNudgeCounter.incr()
                  Future.Done
              }
        }
    }
  }
  def apply(
    strato: StratoClient,
    enableDarkTraffic: Gate[Unit],
    stats: StatsReceiver
  ): NudgeBuilder = {
    val executer =
      strato.executer[CreateTweetNudgeRequest, CreateTweetNudgeResponse](
        "incentives/createTweetNudge")
    val nudgeArrow: FutureArrow[CreateTweetNudgeRequest, CreateTweetNudgeResponse] = { req =>
      Stitch.run(executer.execute(req))
    }
    apply(nudgeArrow, enableDarkTraffic, stats)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD
@ -0,0 +1,18 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "src/java/com/twitter/common/text/language:language-identifier",
        "src/java/com/twitter/common/text/language:locale-util",
        "src/java/com/twitter/common/text/pipeline",
        "src/java/com/twitter/common/text/token",
        "src/java/com/twitter/common_internal/text",
        "src/java/com/twitter/common_internal/text/version",
        "tweetypie/src/resources/com/twitter/tweetypie/matching",
        "util/util-core/src/main/scala/com/twitter/concurrent",
        "util/util-core/src/main/scala/com/twitter/io",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala
@ -0,0 +1,92 @@
 package com.twitter.tweetypie.matching
 object TokenSequence {
  /**
   * Is `suffix` a suffix of `s`, starting at `offset` in `s`?
   */
  def hasSuffixAt(s: CharSequence, suffix: CharSequence, offset: Int): Boolean =
    if (offset == 0 && (s.eq(suffix) || s == suffix)) {
      true
    } else if (suffix.length != (s.length - offset)) {
      false
    } else {
      @annotation.tailrec
      def go(i: Int): Boolean =
        if (i == suffix.length) true
        else if (suffix.charAt(i) == s.charAt(offset + i)) go(i + 1)
        else false
      go(0)
    }
  /**
   * Do two [[CharSequence]]s contain the same characters?
   *
   * [[CharSequence]] equality is not sufficient because
   * [[CharSequence]]s of different types may not consider other
   * [[CharSequence]]s containing the same characters equivalent.
   */
  def sameCharacters(s1: CharSequence, s2: CharSequence): Boolean =
    hasSuffixAt(s1, s2, 0)
  /**
   * This method implements the product definition of a token matching a
   * keyword. That definition is:
   *
   * - The token contains the same characters as the keyword.
   * - The token contains the same characters as the keyword after
   *   dropping a leading '#' or '@' from the token.
   *
   * The intention is that a keyword matches an identical hashtag, but
   * if the keyword itself is a hashtag, it only matches the hashtag
   * form.
   *
   * The tokenization process should rule out tokens or keywords that
   * start with multiple '#' characters, even though this implementation
   * allows for e.g. token "##a" to match "#a".
   */
  def tokenMatches(token: CharSequence, keyword: CharSequence): Boolean =
    if (sameCharacters(token, keyword)) true
    else if (token.length == 0) false
    else {
      val tokenStart = token.charAt(0)
      (tokenStart == '#' || tokenStart == '@') && hasSuffixAt(token, keyword, 1)
    }
 }
 /**
 * A sequence of normalized tokens. The sequence depends on the locale
 * in which the text was parsed and the version of the penguin library
 * that was used at tokenization time.
 */
 case class TokenSequence private[matching] (toIndexedSeq: IndexedSeq[CharSequence]) {
  import TokenSequence.tokenMatches
  private def apply(i: Int): CharSequence = toIndexedSeq(i)
  def isEmpty: Boolean = toIndexedSeq.isEmpty
  def nonEmpty: Boolean = toIndexedSeq.nonEmpty
  /**
   * Does the supplied sequence of keywords match a consecutive sequence
   * of tokens within this sequence?
   */
  def containsKeywordSequence(keywords: TokenSequence): Boolean = {
    val finalIndex = toIndexedSeq.length - keywords.toIndexedSeq.length
    @annotation.tailrec
    def matchesAt(offset: Int, i: Int): Boolean =
      if (i >= keywords.toIndexedSeq.length) true
      else if (tokenMatches(this(i + offset), keywords(i))) matchesAt(offset, i + 1)
      else false
    @annotation.tailrec
    def search(offset: Int): Boolean =
      if (offset > finalIndex) false
      else if (matchesAt(offset, 0)) true
      else search(offset + 1)
    search(0)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala
@ -0,0 +1,156 @@
 package com.twitter.tweetypie.matching
 import com.twitter.common.text.language.LocaleUtil
 import com.twitter.common_internal.text.pipeline.TwitterTextNormalizer
 import com.twitter.common_internal.text.pipeline.TwitterTextTokenizer
 import com.twitter.common_internal.text.version.PenguinVersion
 import com.twitter.concurrent.Once
 import com.twitter.io.StreamIO
 import java.util.Locale
 import scala.collection.JavaConverters._
 /**
 * Extract a sequence of normalized tokens from the input text. The
 * normalization and tokenization are properly configured for keyword
 * matching between texts.
 */
 trait Tokenizer {
  def tokenize(input: String): TokenSequence
 }
 object Tokenizer {
  /**
   * When a Penguin version is not explicitly specified, use this
   * version of Penguin to perform normalization and tokenization. If
   * you cache tokenized text, be sure to store the version as well, to
   * avoid comparing text that was normalized with different algorithms.
   */
  val DefaultPenguinVersion: PenguinVersion = PenguinVersion.PENGUIN_6
  /**
   * If you already know the locale of the text that is being tokenized,
   * use this method to get a tokenizer that is much more efficient than
   * the Tweet or Query tokenizer, since it does not have to perform
   * language detection.
   */
  def forLocale(locale: Locale): Tokenizer = get(locale, DefaultPenguinVersion)
  /**
   * Obtain a `Tokenizer` that will tokenize the text for the given
   * locale and version of the Penguin library.
   */
  def get(locale: Locale, version: PenguinVersion): Tokenizer =
    TokenizerFactories(version).forLocale(locale)
  /**
   * Encapsulates the configuration and use of [[TwitterTextTokenizer]]
   * and [[TwitterTextNormalizer]].
   */
  private[this] class TokenizerFactory(version: PenguinVersion) {
    // The normalizer is thread-safe, so share one instance.
    private[this] val normalizer =
      (new TwitterTextNormalizer.Builder(version)).build()
    // The TwitterTextTokenizer is relatively expensive to build,
    // and is not thread safe, so keep instances of it in a
    // ThreadLocal.
    private[this] val local =
      new ThreadLocal[TwitterTextTokenizer] {
        override def initialValue: TwitterTextTokenizer =
          (new TwitterTextTokenizer.Builder(version)).build()
      }
    /**
     * Obtain a [[Tokenizer]] for this combination of [[PenguinVersion]]
     * and [[Locale]].
     */
    def forLocale(locale: Locale): Tokenizer =
      new Tokenizer {
        override def tokenize(input: String): TokenSequence = {
          val stream = local.get.getTwitterTokenStreamFor(locale)
          stream.reset(normalizer.normalize(input, locale))
          val builder = IndexedSeq.newBuilder[CharSequence]
          while (stream.incrementToken) builder += stream.term()
          TokenSequence(builder.result())
        }
      }
  }
  /**
   * Since there are a small number of Penguin versions, eagerly
   * initialize a TokenizerFactory for each version, to avoid managing
   * mutable state.
   */
  private[this] val TokenizerFactories: PenguinVersion => TokenizerFactory =
    PenguinVersion.values.map(v => v -> new TokenizerFactory(v)).toMap
  /**
   * The set of locales used in warmup. These locales are mentioned in
   * the logic of TwitterTextTokenizer and TwitterTextNormalizer.
   */
  private[this] val WarmUpLocales: Seq[Locale] =
    Seq
      .concat(
        Seq(
          Locale.JAPANESE,
          Locale.KOREAN,
          LocaleUtil.UNKNOWN,
          LocaleUtil.THAI,
          LocaleUtil.ARABIC,
          LocaleUtil.SWEDISH
        ),
        LocaleUtil.CHINESE_JAPANESE_LOCALES.asScala,
        LocaleUtil.CJK_LOCALES.asScala
      )
      .toSet
      .toArray
      .toSeq
  /**
   * Load the default inputs that are used for warming up this library.
   */
  def warmUpCorpus(): Seq[String] = {
    val stream = getClass.getResourceAsStream("warmup-text.txt")
    val bytes =
      try StreamIO.buffer(stream)
      finally stream.close()
    bytes.toString("UTF-8").linesIterator.toArray.toSeq
  }
  /**
   * Exercise the functionality of this library on the specified
   * strings. In general, prefer [[warmUp]] to this method.
   */
  def warmUpWith(ver: PenguinVersion, texts: Iterable[String]): Unit =
    texts.foreach { txt =>
      // Exercise each locale
      WarmUpLocales.foreach { loc =>
        Tokenizer.get(loc, ver).tokenize(txt)
        UserMutes.builder().withPenguinVersion(ver).withLocale(loc).validate(txt)
      }
      // Exercise language detection
      TweetTokenizer.get(ver).tokenize(txt)
      UserMutes.builder().withPenguinVersion(ver).validate(txt)
    }
  private[this] val warmUpOnce = Once(warmUpWith(DefaultPenguinVersion, warmUpCorpus()))
  /**
   * The creation of the first TwitterTextTokenizer is relatively
   * expensive, and tokenizing some texts may cause significant
   * initialization.
   *
   * This method exercises the functionality of this library
   * with a range of texts in order to perform as much initialization as
   * possible before the library is used in a latency-sensitive way.
   *
   * The warmup routine will only run once. Subsequent invocations of
   * `warmUp` will no do additional work, and will return once warmup is
   * complete.
   *
   * The warmup will take on the order of seconds.
   */
  def warmUp(): Unit = warmUpOnce()
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala
@ -0,0 +1,45 @@
 package com.twitter.tweetypie.matching
 import com.twitter.common.text.pipeline.TwitterLanguageIdentifier
 import com.twitter.common_internal.text.version.PenguinVersion
 import java.util.Locale
 object TweetTokenizer extends Tokenizer {
  type LocalePicking = Option[Locale] => Tokenizer
  /**
   * Get a Tokenizer-producing function that uses the supplied locale
   * to select an appropriate Tokenizer.
   */
  def localePicking: LocalePicking = {
    case None => TweetTokenizer
    case Some(locale) => Tokenizer.forLocale(locale)
  }
  private[this] val tweetLangIdentifier =
    (new TwitterLanguageIdentifier.Builder).buildForTweet()
  /**
   * Get a Tokenizer that performs Tweet language detection, and uses
   * that result to tokenize the text. If you already know the locale of
   * the tweet text, use `Tokenizer.get`, because it's much
   * cheaper.
   */
  def get(version: PenguinVersion): Tokenizer =
    new Tokenizer {
      override def tokenize(text: String): TokenSequence = {
        val locale = tweetLangIdentifier.identify(text).getLocale
        Tokenizer.get(locale, version).tokenize(text)
      }
    }
  private[this] val Default = get(Tokenizer.DefaultPenguinVersion)
  /**
   * Tokenize the given text using Tweet language detection and
   * `Tokenizer.DefaultPenguinVersion`. Prefer `Tokenizer.forLocale` if
   * you already know the language of the text.
   */
  override def tokenize(tweetText: String): TokenSequence =
    Default.tokenize(tweetText)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala
@ -0,0 +1,128 @@
 package com.twitter.tweetypie.matching
 import com.twitter.common.text.pipeline.TwitterLanguageIdentifier
 import com.twitter.common_internal.text.version.PenguinVersion
 import java.util.Locale
 import scala.collection.JavaConversions.asScalaBuffer
 object UserMutesBuilder {
  private[matching] val Default =
    new UserMutesBuilder(Tokenizer.DefaultPenguinVersion, None)
  private val queryLangIdentifier =
    (new TwitterLanguageIdentifier.Builder).buildForQuery()
 }
 class UserMutesBuilder private (penguinVersion: PenguinVersion, localeOpt: Option[Locale]) {
  /**
   * Use the specified Penguin version when tokenizing a keyword mute
   * string. In general, use the default version, unless you need to
   * specify a particular version for compatibility with another system
   * that is using that version.
   */
  def withPenguinVersion(ver: PenguinVersion): UserMutesBuilder =
    if (ver == penguinVersion) this
    else new UserMutesBuilder(ver, localeOpt)
  /**
   * Use the specified locale when tokenizing a keyword mute string.
   */
  def withLocale(locale: Locale): UserMutesBuilder =
    if (localeOpt.contains(locale)) this
    else new UserMutesBuilder(penguinVersion, Some(locale))
  /**
   * When tokenizing a user mute list, detect the language of the
   * text. This is significantly more expensive than using a predefined
   * locale, but is appropriate when the locale is not yet known.
   */
  def detectLocale(): UserMutesBuilder =
    if (localeOpt.isEmpty) this
    else new UserMutesBuilder(penguinVersion, localeOpt)
  private[this] lazy val tokenizer =
    localeOpt match {
      case None =>
        // No locale was specified, so use a Tokenizer that performs
        // language detection before tokenizing.
        new Tokenizer {
          override def tokenize(text: String): TokenSequence = {
            val locale = UserMutesBuilder.queryLangIdentifier.identify(text).getLocale
            Tokenizer.get(locale, penguinVersion).tokenize(text)
          }
        }
      case Some(locale) =>
        Tokenizer.get(locale, penguinVersion)
    }
  /**
   * Given a list of the user's raw keyword mutes, return a preprocessed
   * set of mutes suitable for matching against tweet text. If the input
   * contains any phrases that fail validation, then they will be
   * dropped.
   */
  def build(rawInput: Seq[String]): UserMutes =
    UserMutes(rawInput.flatMap(validate(_).right.toOption))
  /**
   * Java-friendly API for processing a user's list of raw keyword mutes
   * into a preprocessed form suitable for matching against text.
   */
  def fromJavaList(rawInput: java.util.List[String]): UserMutes =
    build(asScalaBuffer(rawInput).toSeq)
  /**
   * Validate the raw user input muted phrase. Currently, the only
   * inputs that are not valid for keyword muting are those inputs that
   * do not contain any keywords, because those inputs would match all
   * tweets.
   */
  def validate(mutedPhrase: String): Either[UserMutes.ValidationError, TokenSequence] = {
    val keywords = tokenizer.tokenize(mutedPhrase)
    if (keywords.isEmpty) UserMutes.EmptyPhraseError else Right(keywords)
  }
 }
 object UserMutes {
  sealed trait ValidationError
  /**
   * The phrase's tokenization did not produce any tokens
   */
  case object EmptyPhrase extends ValidationError
  private[matching] val EmptyPhraseError = Left(EmptyPhrase)
  /**
   * Get a [[UserMutesBuilder]] that uses the default Penguin version and
   * performs language identification to choose a locale.
   */
  def builder(): UserMutesBuilder = UserMutesBuilder.Default
 }
 /**
 * A user's muted keyword list, preprocessed into token sequences.
 */
 case class UserMutes private[matching] (toSeq: Seq[TokenSequence]) {
  /**
   * Do any of the users' muted keyword sequences occur within the
   * supplied text?
   */
  def matches(text: TokenSequence): Boolean =
    toSeq.exists(text.containsKeywordSequence)
  /**
   * Find all positions of matching muted keyword from the user's
   * muted keyword list
   */
  def find(text: TokenSequence): Seq[Int] =
    toSeq.zipWithIndex.collect {
      case (token, index) if text.containsKeywordSequence(token) => index
    }
  def isEmpty: Boolean = toSeq.isEmpty
  def nonEmpty: Boolean = toSeq.nonEmpty
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD
@ -0,0 +1,17 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "mediaservices/commons/src/main/thrift:thrift-scala",
        "scrooge/scrooge-core/src/main/scala",
        "tweetypie/servo/util/src/main/scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
        "tco-util",
        "tweetypie/common/src/scala/com/twitter/tweetypie/util",
        "util/util-logging/src/main/scala/com/twitter/logging",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala
@ -0,0 +1,149 @@
 package com.twitter.tweetypie
 package media
 import com.twitter.mediaservices.commons.thriftscala.MediaCategory
 import com.twitter.mediaservices.commons.tweetmedia.thriftscala._
 import com.twitter.tco_util.TcoSlug
 import com.twitter.tweetypie.thriftscala._
 import com.twitter.tweetypie.util.TweetLenses
 /**
 * A smörgåsbord of media-related helper methods.
 */
 object Media {
  val AnimatedGifContentType = "video/mp4 codecs=avc1.42E0"
  case class MediaTco(expandedUrl: String, url: String, displayUrl: String)
  val ImageContentTypes: Set[MediaContentType] =
    Set[MediaContentType](
      MediaContentType.ImageJpeg,
      MediaContentType.ImagePng,
      MediaContentType.ImageGif
    )
  val AnimatedGifContentTypes: Set[MediaContentType] =
    Set[MediaContentType](
      MediaContentType.VideoMp4
    )
  val VideoContentTypes: Set[MediaContentType] =
    Set[MediaContentType](
      MediaContentType.VideoGeneric
    )
  val InUseContentTypes: Set[MediaContentType] =
    Set[MediaContentType](
      MediaContentType.ImageGif,
      MediaContentType.ImageJpeg,
      MediaContentType.ImagePng,
      MediaContentType.VideoMp4,
      MediaContentType.VideoGeneric
    )
  def isImage(contentType: MediaContentType): Boolean =
    ImageContentTypes.contains(contentType)
  def contentTypeToString(contentType: MediaContentType): String =
    contentType match {
      case MediaContentType.ImageGif => "image/gif"
      case MediaContentType.ImageJpeg => "image/jpeg"
      case MediaContentType.ImagePng => "image/png"
      case MediaContentType.VideoMp4 => "video/mp4"
      case MediaContentType.VideoGeneric => "video"
      case _ => throw new IllegalArgumentException(s"UnknownMediaContentType: $contentType")
    }
  def stringToContentType(str: String): MediaContentType =
    str match {
      case "image/gif" => MediaContentType.ImageGif
      case "image/jpeg" => MediaContentType.ImageJpeg
      case "image/png" => MediaContentType.ImagePng
      case "video/mp4" => MediaContentType.VideoMp4
      case "video" => MediaContentType.VideoGeneric
      case _ => throw new IllegalArgumentException(s"Unknown Content Type String: $str")
    }
  def extensionForContentType(cType: MediaContentType): String =
    cType match {
      case MediaContentType.ImageJpeg => "jpg"
      case MediaContentType.ImagePng => "png"
      case MediaContentType.ImageGif => "gif"
      case MediaContentType.VideoMp4 => "mp4"
      case MediaContentType.VideoGeneric => ""
      case _ => "unknown"
    }
  /**
   * Extract a URL entity from a media entity.
   */
  def extractUrlEntity(mediaEntity: MediaEntity): UrlEntity =
    UrlEntity(
      fromIndex = mediaEntity.fromIndex,
      toIndex = mediaEntity.toIndex,
      url = mediaEntity.url,
      expanded = Some(mediaEntity.expandedUrl),
      display = Some(mediaEntity.displayUrl)
    )
  /**
   * Copy the fields from the URL entity into the media entity.
   */
  def copyFromUrlEntity(mediaEntity: MediaEntity, urlEntity: UrlEntity): MediaEntity = {
    val expandedUrl =
      urlEntity.expanded.orElse(Option(mediaEntity.expandedUrl)).getOrElse(urlEntity.url)
    val displayUrl =
      urlEntity.url match {
        case TcoSlug(slug) => MediaUrl.Display.fromTcoSlug(slug)
        case _ => urlEntity.expanded.getOrElse(urlEntity.url)
      }
    mediaEntity.copy(
      fromIndex = urlEntity.fromIndex,
      toIndex = urlEntity.toIndex,
      url = urlEntity.url,
      expandedUrl = expandedUrl,
      displayUrl = displayUrl
    )
  }
  def getAspectRatio(size: MediaSize): AspectRatio =
    getAspectRatio(size.width, size.height)
  def getAspectRatio(width: Int, height: Int): AspectRatio = {
    if (width == 0 || height == 0) {
      throw new IllegalArgumentException(s"Dimensions must be non zero: ($width, $height)")
    }
    def calculateGcd(a: Int, b: Int): Int =
      if (b == 0) a else calculateGcd(b, a % b)
    val gcd = calculateGcd(math.max(width, height), math.min(width, height))
    AspectRatio((width / gcd).toShort, (height / gcd).toShort)
  }
  /**
   * Return just the media that belongs to this tweet
   */
  def ownMedia(tweet: Tweet): Seq[MediaEntity] =
    TweetLenses.media.get(tweet).filter(isOwnMedia(tweet.id, _))
  /**
   * Does the given media entity, which is was found on the tweet with the specified
   * tweetId, belong to that tweet?
   */
  def isOwnMedia(tweetId: TweetId, entity: MediaEntity): Boolean =
    entity.sourceStatusId.forall(_ == tweetId)
  /**
   * Mixed Media is any case where there is more than one media item & any of them is not an image.
   */
  def isMixedMedia(mediaEntities: Seq[MediaEntity]): Boolean =
    mediaEntities.length > 1 && (mediaEntities.flatMap(_.mediaInfo).exists {
      case _: MediaInfo.ImageInfo => false
      case _ => true
    } ||
      mediaEntities.flatMap(_.mediaKey).map(_.mediaCategory).exists(_ != MediaCategory.TweetImage))
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala
@ -0,0 +1,108 @@
 package com.twitter.tweetypie
 package media
 import com.twitter.logging.Logger
 import com.twitter.tweetypie.thriftscala.MediaEntity
 import com.twitter.tweetypie.thriftscala.UrlEntity
 /**
 * Creating and parsing tweet media entity URLs.
 *
 * There are four kinds of URL in a media entity:
 *
 *   - Display URLs: pic.twitter.com aliases for the short URL, for
 *     embedding in the tweet text.
 *
 *   - Short URLs: regular t.co URLs that expand to the permalink URL.
 *
 *   - Permalink URLs: link to a page that displays the media after
 *     doing authorization
 *
 *   - Asset URLs: links to the actual media asset.
 *
 */
 object MediaUrl {
  private[this] val log = Logger(getClass)
  /**
   * The URL that should be filled in to the displayUrl field of the
   * media entity. This URL behaves exactly the same as a t.co link
   * (only the domain is different.)
   */
  object Display {
    val Root = "pic.twitter.com/"
    def fromTcoSlug(tcoSlug: String): String = Root + tcoSlug
  }
  /**
   * The link target for the link in the tweet text (the expanded URL
   * for the media, copied from the URL entity.) For native photos,
   * this is the tweet permalink page.
   *
   * For users without a screen name ("handleless" or NoScreenName users)
   * a permalink to /i/status/:tweet_id is used.
   */
  object Permalink {
    val Root = "https://twitter.com/"
    val Internal = "i"
    val PhotoSuffix = "/photo/1"
    val VideoSuffix = "/video/1"
    def apply(screenName: String, tweetId: TweetId, isVideo: Boolean): String =
      Root +
        (if (screenName.isEmpty) Internal else screenName) +
        "/status/" +
        tweetId +
        (if (isVideo) VideoSuffix else PhotoSuffix)
    private[this] val PermalinkRegex =
      """https?://twitter.com/(?:#!/)?\w+/status/(\d+)/(?:photo|video)/\d+""".r
    private[this] def getTweetId(permalink: String): Option[TweetId] =
      permalink match {
        case PermalinkRegex(tweetIdStr) =>
          try {
            Some(tweetIdStr.toLong)
          } catch {
            // Digits too big to fit in a Long
            case _: NumberFormatException => None
          }
        case _ => None
      }
    def getTweetId(urlEntity: UrlEntity): Option[TweetId] =
      urlEntity.expanded.flatMap(getTweetId)
    def hasTweetId(permalink: String, tweetId: TweetId): Boolean =
      getTweetId(permalink).contains(tweetId)
    def hasTweetId(mediaEntity: MediaEntity, tweetId: TweetId): Boolean =
      hasTweetId(mediaEntity.expandedUrl, tweetId)
    def hasTweetId(urlEntity: UrlEntity, tweetId: TweetId): Boolean =
      getTweetId(urlEntity).contains(tweetId)
  }
  /**
   * Converts a url that starts with "https://" to one that starts with "http://".
   */
  def httpsToHttp(url: String): String =
    url.replace("https://", "http://")
  /**
   * Gets the last path element from an asset url.  This exists temporarily to support
   * the now deprecated mediaPath element in MediaEntity.
   */
  def mediaPathFromUrl(url: String): String =
    url.lastIndexOf('/') match {
      case -1 =>
        log.error("Invalid media path. Could not find last element: " + url)
        // Better to return a broken preview URL to the client
        // than to fail the whole request.
        ""
      case idx =>
        url.substring(idx + 1)
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala
@ -0,0 +1,7 @@
 package com.twitter.tweetypie
 package object media {
  type TweetId = Long
  type UserId = Long
  type MediaId = Long
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala
@ -0,0 +1,80 @@
 package com.twitter.tweetypie.storage
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.stitch.Stitch
 import com.twitter.storage.client.manhattan.kv.ManhattanValue
 import com.twitter.tweetypie.storage.TweetUtils.collectWithRateLimitCheck
 import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
 import com.twitter.tweetypie.thriftscala.Tweet
 import com.twitter.util.Time
 object AddTweetHandler {
  private[storage] type InternalAddTweet = (
    Tweet,
    ManhattanOperations.Insert,
    Scribe,
    StatsReceiver,
    Time
  ) => Stitch[Unit]
  def apply(
    insert: ManhattanOperations.Insert,
    scribe: Scribe,
    stats: StatsReceiver
  ): TweetStorageClient.AddTweet =
    tweet => doAddTweet(tweet, insert, scribe, stats, Time.now)
  def makeRecords(
    storedTweet: StoredTweet,
    timestamp: Time
  ): Seq[TweetManhattanRecord] = {
    val core = CoreFieldsCodec.fromTweet(storedTweet)
    val packedCoreFieldsBlob = CoreFieldsCodec.toTFieldBlob(core)
    val coreRecord =
      TweetManhattanRecord(
        TweetKey.coreFieldsKey(storedTweet.id),
        ManhattanValue(TFieldBlobCodec.toByteBuffer(packedCoreFieldsBlob), Some(timestamp))
      )
    val otherFieldIds =
      TweetFields.nonCoreInternalFields ++ TweetFields.getAdditionalFieldIds(storedTweet)
    val otherFields =
      storedTweet
        .getFieldBlobs(otherFieldIds)
        .map {
          case (fieldId, tFieldBlob) =>
            TweetManhattanRecord(
              TweetKey.fieldKey(storedTweet.id, fieldId),
              ManhattanValue(TFieldBlobCodec.toByteBuffer(tFieldBlob), Some(timestamp))
            )
        }
        .toSeq
    otherFields :+ coreRecord
  }
  private[storage] val doAddTweet: InternalAddTweet = (
    tweet: Tweet,
    insert: ManhattanOperations.Insert,
    scribe: Scribe,
    stats: StatsReceiver,
    timestamp: Time
  ) => {
    assert(tweet.coreData.isDefined, s"Tweet ${tweet.id} is missing coreData: $tweet")
    val storedTweet = StorageConversions.toStoredTweet(tweet)
    val records = makeRecords(storedTweet, timestamp)
    val inserts = records.map(insert)
    val insertsWithRateLimitCheck =
      Stitch.collect(inserts.map(_.liftToTry)).map(collectWithRateLimitCheck).lowerFromTry
    Stats.updatePerFieldQpsCounters(
      "addTweet",
      TweetFields.getAdditionalFieldIds(storedTweet),
      1,
      stats
    )
    insertsWithRateLimitCheck.unit.onSuccess { _ => scribe.logAdded(storedTweet) }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD
@ -0,0 +1,47 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    strict_deps = True,
    tags = [
        "bazel-compatible",
        "bazel-incompatible-scaladoc",
    ],
    dependencies = [
        "3rdparty/jvm/com/chuusai:shapeless",
        "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind",
        "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala",
        "3rdparty/jvm/com/google/guava",
        "3rdparty/jvm/com/twitter/bijection:core",
        "3rdparty/jvm/com/twitter/bijection:scrooge",
        "3rdparty/jvm/com/twitter/bijection:thrift",
        "3rdparty/jvm/commons-codec",
        "3rdparty/jvm/org/apache/thrift:libthrift",
        "diffshow",
        "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization",
        "finagle/finagle-core/src/main",
        "finagle/finagle-stats",
        "finagle/finagle-thriftmux/src/main/scala",
        "mediaservices/commons/src/main/thrift:thrift-scala",
        "scrooge/scrooge-serializer/src/main/scala",
        "tweetypie/servo/repo/src/main/scala",
        "tweetypie/servo/util",
        "snowflake:id",
        "src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
        "src/thrift/com/twitter/manhattan:internal-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
        "stitch/stitch-core",
        "storage/clients/manhattan/client/src/main/scala",
        "tbird-thrift:scala",
        "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields",
        "tweetypie/common/src/scala/com/twitter/tweetypie/client_id",
        "tweetypie/common/src/scala/com/twitter/tweetypie/util",
        "tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal:storage_internal-scala",
        "util-internal/scribe",
        "util/util-core:scala",
        "util/util-slf4j-api/src/main/scala/com/twitter/util/logging",
        "util/util-stats/src/main/scala",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala
@ -0,0 +1,20 @@
 package com.twitter.tweetypie.storage
 import com.twitter.util.Time
 object BounceDeleteHandler {
  def apply(
    insert: ManhattanOperations.Insert,
    scribe: Scribe
  ): TweetStorageClient.BounceDelete =
    tweetId => {
      val mhTimestamp = Time.now
      val bounceDeleteRecord = TweetStateRecord
        .BounceDeleted(tweetId, mhTimestamp.inMillis)
        .toTweetMhRecord
      insert(bounceDeleteRecord).onSuccess { _ =>
        scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true)
      }
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala
@ -0,0 +1,242 @@
 package com.twitter.tweetypie.storage
 import com.twitter.bijection.Conversion.asMethod
 import com.twitter.bijection.Injection
 import com.twitter.scrooge.TFieldBlob
 import com.twitter.storage.client.manhattan.kv._
 import com.twitter.tweetypie.storage.Response.FieldResponse
 import com.twitter.tweetypie.storage.Response.FieldResponseCode
 import com.twitter.tweetypie.storage_internal.thriftscala.CoreFields
 import com.twitter.tweetypie.storage_internal.thriftscala.InternalTweet
 import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
 import java.io.ByteArrayOutputStream
 import java.nio.ByteBuffer
 import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.TIOStreamTransport
 import org.apache.thrift.transport.TMemoryInputTransport
 import scala.collection.immutable
 import scala.util.control.NoStackTrace
 // NOTE: All field ids and Tweet structure in this file correspond to the StoredTweet struct ONLY
 object ByteArrayCodec {
  def toByteBuffer(byteArray: Array[Byte]): ByteBuffer = byteArray.as[ByteBuffer]
  def fromByteBuffer(buffer: ByteBuffer): Array[Byte] = buffer.as[Array[Byte]]
 }
 object StringCodec {
  private val string2ByteBuffer = Injection.connect[String, Array[Byte], ByteBuffer]
  def toByteBuffer(strValue: String): ByteBuffer = string2ByteBuffer(strValue)
  def fromByteBuffer(buffer: ByteBuffer): String = string2ByteBuffer.invert(buffer).get
 }
 /**
 * Terminology
 * -----------
 * Tweet id field             : The field number of 'tweetId' in the 'Tweet' thrift structure (i.e "1")
 *
 * First AdditionalField id   : The ID if the first additional field in 'Tweet' thrift structure. All field Ids less than this are
 *                              considered internal and all the ids greater than or equal to this field id are considered 'Additional fields'.
 *                              This is set to 100.
 *
 * Internal Fields            : Fields with ids [1 to firstAdditionalFieldid) (excluding firstAdditionalFieldId)
 *
 * Core fields                : (Subset of Internal fields)- Fields with ids [1 to 8, 19]. These fields are "packed" together and stored
 *                              under a single key. This key is referred to as "CoreFieldsKey" (see @TweetKeyType.CoreFieldsKey).
 *                              Note: Actually field 1 is skipped when packing as this field is the tweet id and it need not be
 *                              explicitly stored since the pkey already contains the tweet Id)
 *
 * Root Core field id         : The field id under which the packed core fields are stored in Manhattan. (This is field Id "1")
 *
 * Required fields            : (Subset of Core fields) - Fields with ids [1 to 5] that MUST be present on every tweet.
 *
 * Additional Fields          : All fields with field ids >= 'firstAdditionalFieldId'
 *
 * Compiled Additional fields : (Subset of Additional Fields) - All fields that the storage library knows about
 *                              (i.e present on the latest storage_internal.thrift that is compiled-in).
 *
 * Passthrough fields         : (Subset of Additional Fields) - The fields on storage_internal.thrift that the storage library is NOT aware of
 *                              These field ids are is obtained looking at the "_passThroughFields" member of the scrooge-generated
 *                             'Tweet' object.
 *
 * coreFieldsIdInInternalTweet: This is the field id of the core fields (the only field) in the Internal Tweet struct
 */
 object TweetFields {
  val firstAdditionalFieldId: Short = 100
  val tweetIdField: Short = 1
  val geoFieldId: Short = 9
  // The field under which all the core field values are stored (in serialized form).
  val rootCoreFieldId: Short = 1
  val coreFieldIds: immutable.IndexedSeq[FieldId] = {
    val quotedTweetFieldId: Short = 19
    (1 to 8).map(_.toShort) ++ Seq(quotedTweetFieldId)
  }
  val requiredFieldIds: immutable.IndexedSeq[FieldId] = (1 to 5).map(_.toShort)
  val coreFieldsIdInInternalTweet: Short = 1
  val compiledAdditionalFieldIds: Seq[FieldId] =
    StoredTweet.metaData.fields.filter(_.id >= firstAdditionalFieldId).map(_.id)
  val internalFieldIds: Seq[FieldId] =
    StoredTweet.metaData.fields.filter(_.id < firstAdditionalFieldId).map(_.id)
  val nonCoreInternalFields: Seq[FieldId] =
    (internalFieldIds.toSet -- coreFieldIds.toSet).toSeq
  def getAdditionalFieldIds(tweet: StoredTweet): Seq[FieldId] =
    compiledAdditionalFieldIds ++ tweet._passthroughFields.keys.toSeq
 }
 /**
 * Helper object to convert TFieldBlob to ByteBuffer that gets stored in Manhattan.
 *
 * The following is the format in which the TFieldBlob gets stored:
 *    [Version][TField][TFieldBlob]
 */
 object TFieldBlobCodec {
  val BinaryProtocolFactory: TBinaryProtocol.Factory = new TBinaryProtocol.Factory()
  val FormatVersion = 1.0
  def toByteBuffer(tFieldBlob: TFieldBlob): ByteBuffer = {
    val baos = new ByteArrayOutputStream()
    val prot = BinaryProtocolFactory.getProtocol(new TIOStreamTransport(baos))
    prot.writeDouble(FormatVersion)
    prot.writeFieldBegin(tFieldBlob.field)
    prot.writeBinary(ByteArrayCodec.toByteBuffer(tFieldBlob.data))
    ByteArrayCodec.toByteBuffer(baos.toByteArray)
  }
  def fromByteBuffer(buffer: ByteBuffer): TFieldBlob = {
    val byteArray = ByteArrayCodec.fromByteBuffer(buffer)
    val prot = BinaryProtocolFactory.getProtocol(new TMemoryInputTransport(byteArray))
    val version = prot.readDouble()
    if (version != FormatVersion) {
      throw new VersionMismatchError(
        "Version mismatch in decoding ByteBuffer to TFieldBlob. " +
          "Actual version: " + version + ". Expected version: " + FormatVersion
      )
    }
    val tField = prot.readFieldBegin()
    val dataBuffer = prot.readBinary()
    val data = ByteArrayCodec.fromByteBuffer(dataBuffer)
    TFieldBlob(tField, data)
  }
 }
 /**
 * Helper object to help convert 'CoreFields' object to/from TFieldBlob (and also to construct
 * 'CoreFields' object from a 'StoredTweet' object)
 *
 * More details:
 * - A subset of fields on the 'StoredTweet' thrift structure (2-8,19) are 'packaged' and stored
 *   together as a serialized TFieldBlob object under a single key in Manhattan (see TweetKeyCodec
 *   helper object above for more details).
 *
 * - To make the packing/unpacking the fields to/from TFieldBlob object, we created the following
 *   two helper thrift structures 'CoreFields' and 'InternalTweet'
 *
 *       // The field Ids and types here MUST exactly match field Ids on 'StoredTweet' thrift structure.
 *       struct CoreFields {
 *          2: optional i64 user_id
 *          ...
 *          8: optional i64 contributor_id
 *          ...
 *          19: optional StoredQuotedTweet stored_quoted_tweet
 *
 *       }
 *
 *       // The field id of core fields MUST be "1"
 *       struct InternalTweet {
 *         1: CoreFields coreFields
 *       }
 *
 * - Given the above two structures, packing/unpacking fields (2-8,19) on StoredTweet object into a TFieldBlob
 *   becomes very trivial:
 *     For packing:
 *       (i) Copy fields (2-8,19) from StoredTweet object to a new CoreFields object
 *      (ii) Create a new InternalTweet object with the 'CoreFields' object constructed in step (i) above
 *     (iii) Extract field "1" as a TFieldBlob from InternalField (by calling the scrooge generated "getFieldBlob(1)"
 *           function on the InternalField objecton
 *
 *     For unpacking:
 *       (i) Create an empty 'InternalField' object
 *      (ii) Call scrooge-generated 'setField' by passing the tFieldBlob blob (created by packing steps above)
 *     (iii) Doing step (ii) above will create a hydrated 'CoreField' object that can be accessed by 'coreFields'
 *           member of 'InternalTweet' object.
 */
 object CoreFieldsCodec {
  val coreFieldIds: Seq[FieldId] = CoreFields.metaData.fields.map(_.id)
  // "Pack" the core fields i.e converts 'CoreFields' object to "packed" tFieldBlob (See description
  // above for more details)
  def toTFieldBlob(coreFields: CoreFields): TFieldBlob = {
    InternalTweet(Some(coreFields)).getFieldBlob(TweetFields.coreFieldsIdInInternalTweet).get
  }
  // "Unpack" the core fields from a packed TFieldBlob into a CoreFields object (see description above for
  // more details)
  def fromTFieldBlob(tFieldBlob: TFieldBlob): CoreFields = {
    InternalTweet().setField(tFieldBlob).coreFields.get
  }
  // "Unpack" the core fields from a packed TFieldBlob into a Map of core-fieldId-> TFieldBlob
  def unpackFields(tFieldBlob: TFieldBlob): Map[Short, TFieldBlob] =
    fromTFieldBlob(tFieldBlob).getFieldBlobs(coreFieldIds)
  // Create a 'CoreFields' thrift object from 'Tweet' thrift object.
  def fromTweet(tweet: StoredTweet): CoreFields = {
    // As mentioned above, the field ids and types on the 'CoreFields' struct exactly match the
    // corresponding fields on StoredTweet structure. So it is safe to call .getField() on Tweet object and
    // and pass the returned tFleldBlob a 'setField' on 'CoreFields' object.
    coreFieldIds.foldLeft(CoreFields()) {
      case (core, fieldId) =>
        tweet.getFieldBlob(fieldId) match {
          case None => core
          case Some(tFieldBlob) => core.setField(tFieldBlob)
        }
    }
  }
 }
 /**
 * Helper object to convert ManhattanException to FieldResponseCode thrift object
 */
 object FieldResponseCodeCodec {
  import FieldResponseCodec.ValueNotFoundException
  def fromManhattanException(mhException: ManhattanException): FieldResponseCode = {
    mhException match {
      case _: ValueNotFoundException => FieldResponseCode.ValueNotFound
      case _: InternalErrorManhattanException => FieldResponseCode.Error
      case _: InvalidRequestManhattanException => FieldResponseCode.InvalidRequest
      case _: DeniedManhattanException => FieldResponseCode.Error
      case _: UnsatisfiableManhattanException => FieldResponseCode.Error
      case _: TimeoutManhattanException => FieldResponseCode.Timeout
    }
  }
 }
 /**
 * Helper object to construct FieldResponse thrift object from an Exception.
 * This is typically called to convert 'ManhattanException' object to 'FieldResponse' thrift object
 */
 object FieldResponseCodec {
  class ValueNotFoundException extends ManhattanException("Value not found!") with NoStackTrace
  private[storage] val NotFound = new ValueNotFoundException
  def fromThrowable(e: Throwable, additionalMsg: Option[String] = None): FieldResponse = {
    val (respCode, errMsg) = e match {
      case mhException: ManhattanException =>
        (FieldResponseCodeCodec.fromManhattanException(mhException), mhException.getMessage)
      case _ => (FieldResponseCode.Error, e.getMessage)
    }
    val respMsg = additionalMsg.map(_ + ". " + errMsg).orElse(Some(errMsg.toString))
    FieldResponse(respCode, respMsg)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala
@ -0,0 +1,67 @@
 package com.twitter.tweetypie.storage
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.stitch.Stitch
 import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.util.Throw
 import com.twitter.util.Time
 object DeleteAdditionalFieldsHandler {
  def apply(
    delete: ManhattanOperations.Delete,
    stats: StatsReceiver
  ): TweetStorageClient.DeleteAdditionalFields =
    (unfilteredTweetIds: Seq[TweetId], additionalFields: Seq[Field]) => {
      val tweetIds = unfilteredTweetIds.filter(_ > 0)
      val additionalFieldIds = additionalFields.map(_.id)
      require(additionalFields.nonEmpty, "Additional fields to delete cannot be empty")
      require(
        additionalFieldIds.min >= TweetFields.firstAdditionalFieldId,
        s"Additional fields $additionalFields must be in additional field range (>= ${TweetFields.firstAdditionalFieldId})"
      )
      Stats.addWidthStat("deleteAdditionalFields", "tweetIds", tweetIds.size, stats)
      Stats.addWidthStat(
        "deleteAdditionalFields",
        "additionalFieldIds",
        additionalFieldIds.size,
        stats
      )
      Stats.updatePerFieldQpsCounters(
        "deleteAdditionalFields",
        additionalFieldIds,
        tweetIds.size,
        stats
      )
      val mhTimestamp = Time.now
      val stitches = tweetIds.map { tweetId =>
        val (fieldIds, mhKeysToDelete) =
          additionalFieldIds.map { fieldId =>
            (fieldId, TweetKey.additionalFieldsKey(tweetId, fieldId))
          }.unzip
        val deletionStitches = mhKeysToDelete.map { mhKeyToDelete =>
          delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry
        }
        Stitch.collect(deletionStitches).map { responsesTries =>
          val wasRateLimited = responsesTries.exists {
            case Throw(e: DeniedManhattanException) => true
            case _ => false
          }
          val resultsPerTweet = fieldIds.zip(responsesTries).toMap
          if (wasRateLimited) {
            buildTweetOverCapacityResponse("deleteAdditionalFields", tweetId, resultsPerTweet)
          } else {
            buildTweetResponse("deleteAdditionalFields", tweetId, resultsPerTweet)
          }
        }
      }
      Stitch.collect(stitches)
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala
@ -0,0 +1,41 @@
 package com.twitter.tweetypie.storage
 import com.twitter.tweetypie.additionalfields.AdditionalFields
 import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
 import com.twitter.tweetypie.thriftscala.{Tweet => TpTweet}
 /**
 * A field of the stored version of a tweet to read, update, or delete.
 *
 * There is not a one-to-one correspondence between the fields ids of
 * [[com.twitter.tweetypie.thriftscala.Tweet]] and
 * [[com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet]]. For example, in StoredTweet,
 * the nsfwUser property is field 11; in Tweet, it is a property of the coreData struct in field 2.
 * To circumvent the confusion of using one set of field ids or the other, callers use instances of
 * [[Field]] to reference the part of the object to modify.
 */
 class Field private[storage] (val id: Short) extends AnyVal {
  override def toString: String = id.toString
 }
 /**
 * NOTE: Make sure `AllUpdatableCompiledFields` is kept up to date when adding any new field
 */
 object Field {
  import AdditionalFields.isAdditionalFieldId
  val Geo: Field = new Field(StoredTweet.GeoField.id)
  val HasTakedown: Field = new Field(StoredTweet.HasTakedownField.id)
  val NsfwUser: Field = new Field(StoredTweet.NsfwUserField.id)
  val NsfwAdmin: Field = new Field(StoredTweet.NsfwAdminField.id)
  val TweetypieOnlyTakedownCountryCodes: Field =
    new Field(TpTweet.TweetypieOnlyTakedownCountryCodesField.id)
  val TweetypieOnlyTakedownReasons: Field =
    new Field(TpTweet.TweetypieOnlyTakedownReasonsField.id)
  val AllUpdatableCompiledFields: Set[Field] = Set(Geo, HasTakedown, NsfwUser, NsfwAdmin)
  def additionalField(id: Short): Field = {
    require(isAdditionalFieldId(id), "field id must be in the additional field range")
    new Field(id)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala
@ -0,0 +1,150 @@
 package com.twitter.tweetypie.storage
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.stitch.Stitch
 import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
 import com.twitter.tweetypie.storage.Response.TweetResponseCode
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
 import com.twitter.tweetypie.thriftscala.DeletedTweet
 import scala.util.control.NonFatal
 sealed trait DeleteState
 object DeleteState {
  /**
   * This tweet is deleted but has not been permanently deleted from Manhattan. Tweets in this state
   * may be undeleted.
   */
  case object SoftDeleted extends DeleteState
  /**
   * This tweet is deleted after being bounced for violating the Twitter Rules but has not been
   * permanently deleted from Manhattan. Tweets in this state may NOT be undeleted.
   */
  case object BounceDeleted extends DeleteState
  /**
   * This tweet has been permanently deleted from Manhattan.
   */
  case object HardDeleted extends DeleteState
  /**
   * There is no data in Manhattan to distinguish this tweet id from one that never existed.
   */
  case object NotFound extends DeleteState
  /**
   * This tweet exists and is not in a deleted state.
   */
  case object NotDeleted extends DeleteState
 }
 case class DeletedTweetResponse(
  tweetId: TweetId,
  overallResponse: TweetResponseCode,
  deleteState: DeleteState,
  tweet: Option[DeletedTweet])
 object GetDeletedTweetsHandler {
  def apply(
    read: ManhattanOperations.Read,
    stats: StatsReceiver
  ): TweetStorageClient.GetDeletedTweets =
    (unfilteredTweetIds: Seq[TweetId]) => {
      val tweetIds = unfilteredTweetIds.filter(_ > 0)
      Stats.addWidthStat("getDeletedTweets", "tweetIds", tweetIds.size, stats)
      val stitches = tweetIds.map { tweetId =>
        read(tweetId)
          .map { mhRecords =>
            val storedTweet = buildStoredTweet(tweetId, mhRecords)
            TweetStateRecord.mostRecent(mhRecords) match {
              case Some(m: TweetStateRecord.SoftDeleted) => softDeleted(m, storedTweet)
              case Some(m: TweetStateRecord.BounceDeleted) => bounceDeleted(m, storedTweet)
              case Some(m: TweetStateRecord.HardDeleted) => hardDeleted(m, storedTweet)
              case _ if storedTweet.getFieldBlobs(expectedFields).isEmpty => notFound(tweetId)
              case _ => notDeleted(tweetId, storedTweet)
            }
          }
          .handle {
            case _: DeniedManhattanException =>
              DeletedTweetResponse(
                tweetId,
                TweetResponseCode.OverCapacity,
                DeleteState.NotFound,
                None
              )
            case NonFatal(ex) =>
              TweetUtils.log.warning(
                ex,
                s"Unhandled exception in GetDeletedTweetsHandler for tweetId: $tweetId"
              )
              DeletedTweetResponse(tweetId, TweetResponseCode.Failure, DeleteState.NotFound, None)
          }
      }
      Stitch.collect(stitches)
    }
  private def notFound(tweetId: TweetId) =
    DeletedTweetResponse(
      tweetId = tweetId,
      overallResponse = TweetResponseCode.Success,
      deleteState = DeleteState.NotFound,
      tweet = None
    )
  private def softDeleted(record: TweetStateRecord.SoftDeleted, storedTweet: StoredTweet) =
    DeletedTweetResponse(
      record.tweetId,
      TweetResponseCode.Success,
      DeleteState.SoftDeleted,
      Some(
        StorageConversions
          .toDeletedTweet(storedTweet)
          .copy(deletedAtMsec = Some(record.createdAt))
      )
    )
  private def bounceDeleted(record: TweetStateRecord.BounceDeleted, storedTweet: StoredTweet) =
    DeletedTweetResponse(
      record.tweetId,
      TweetResponseCode.Success,
      DeleteState.BounceDeleted,
      Some(
        StorageConversions
          .toDeletedTweet(storedTweet)
          .copy(deletedAtMsec = Some(record.createdAt))
      )
    )
  private def hardDeleted(record: TweetStateRecord.HardDeleted, storedTweet: StoredTweet) =
    DeletedTweetResponse(
      record.tweetId,
      TweetResponseCode.Success,
      DeleteState.HardDeleted,
      Some(
        StorageConversions
          .toDeletedTweet(storedTweet)
          .copy(
            hardDeletedAtMsec = Some(record.createdAt),
            deletedAtMsec = Some(record.deletedAt)
          )
      )
    )
  /**
   * notDeleted returns a tweet to simplify tweetypie.handler.UndeleteTweetHandler
   */
  private def notDeleted(tweetId: TweetId, storedTweet: StoredTweet) =
    DeletedTweetResponse(
      tweetId = tweetId,
      overallResponse = TweetResponseCode.Success,
      deleteState = DeleteState.NotDeleted,
      tweet = Some(StorageConversions.toDeletedTweet(storedTweet))
    )
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala
@ -0,0 +1,126 @@
 package com.twitter.tweetypie.storage
 import com.twitter.conversions.DurationOps._
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.stitch.Stitch
 import com.twitter.stitch.StitchSeqGroup
 import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet
 import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Error
 import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Response._
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.tweetypie.thriftscala.Tweet
 import com.twitter.util.Time
 import com.twitter.util.Try
 import scala.collection.mutable
 object GetStoredTweetHandler {
  private[this] object DeletedState {
    def unapply(stateRecord: Option[TweetStateRecord]): Option[TweetStateRecord] =
      stateRecord match {
        case state @ (Some(_: TweetStateRecord.SoftDeleted) | Some(
              _: TweetStateRecord.HardDeleted) | Some(_: TweetStateRecord.BounceDeleted)) =>
          state
        case _ => None
      }
  }
  private[this] def deletedAtMs(stateRecord: Option[TweetStateRecord]): Option[Long] =
    stateRecord match {
      case Some(d: TweetStateRecord.SoftDeleted) => Some(d.createdAt)
      case Some(d: TweetStateRecord.BounceDeleted) => Some(d.createdAt)
      case Some(d: TweetStateRecord.HardDeleted) => Some(d.deletedAt)
      case _ => None
    }
  private[this] def tweetResponseFromRecords(
    tweetId: TweetId,
    mhRecords: Seq[TweetManhattanRecord],
    statsReceiver: StatsReceiver,
  ): GetStoredTweet.Response = {
    val errs =
      mutable.Buffer[Error]()
    val hasStoredTweetFields: Boolean = mhRecords.exists {
      case TweetManhattanRecord(TweetKey(_, _: TweetKey.LKey.FieldKey), _) => true
      case _ => false
    }
    val storedTweet = if (hasStoredTweetFields) {
      Try(buildStoredTweet(tweetId, mhRecords, includeScrubbed = true))
        .onFailure(_ => errs.append(Error.TweetIsCorrupt))
        .toOption
    } else {
      None
    }
    val scrubbedFields: Set[FieldId] = extractScrubbedFields(mhRecords)
    val tweet: Option[Tweet] = storedTweet.map(StorageConversions.fromStoredTweetAllowInvalid)
    val stateRecords: Seq[TweetStateRecord] = TweetStateRecord.fromTweetMhRecords(mhRecords)
    val tweetState: Option[TweetStateRecord] = TweetStateRecord.mostRecent(mhRecords)
    storedTweet.foreach { storedTweet =>
      val storedExpectedFields = storedTweet.getFieldBlobs(expectedFields)
      val missingExpectedFields = expectedFields.filterNot(storedExpectedFields.contains)
      if (missingExpectedFields.nonEmpty || !isValid(storedTweet)) {
        errs.append(Error.TweetFieldsMissingOrInvalid)
      }
      val invalidScrubbedFields = storedTweet.getFieldBlobs(scrubbedFields).keys
      if (invalidScrubbedFields.nonEmpty) {
        errs.append(Error.ScrubbedFieldsPresent)
      }
      if (deletedAtMs(tweetState).exists(_ < Time.now.inMilliseconds - 14.days.inMilliseconds)) {
        errs.append(Error.TweetShouldBeHardDeleted)
      }
    }
    val err = Option(errs.toList).filter(_.nonEmpty)
    (tweet, tweetState, err) match {
      case (None, None, None) =>
        statsReceiver.counter("not_found").incr()
        NotFound(tweetId)
      case (None, Some(tweetState: TweetStateRecord.HardDeleted), None) =>
        statsReceiver.counter("hard_deleted").incr()
        HardDeleted(tweetId, Some(tweetState), stateRecords, scrubbedFields)
      case (None, _, Some(errs)) =>
        statsReceiver.counter("failed").incr()
        Failed(tweetId, tweetState, stateRecords, scrubbedFields, errs)
      case (Some(tweet), _, Some(errs)) =>
        statsReceiver.counter("found_invalid").incr()
        FoundWithErrors(tweet, tweetState, stateRecords, scrubbedFields, errs)
      case (Some(tweet), DeletedState(state), None) =>
        statsReceiver.counter("deleted").incr()
        FoundDeleted(tweet, Some(state), stateRecords, scrubbedFields)
      case (Some(tweet), _, None) =>
        statsReceiver.counter("found").incr()
        Found(tweet, tweetState, stateRecords, scrubbedFields)
    }
  }
  def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetStoredTweet = {
    object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] {
      override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = {
        Stats.addWidthStat("getStoredTweet", "tweetIds", tweetIds.size, statsReceiver)
        Stitch.traverse(tweetIds)(read(_))
      }
    }
    tweetId =>
      if (tweetId <= 0) {
        Stitch.NotFound
      } else {
        Stitch
          .call(tweetId, mhGroup)
          .map(mhRecords =>
            tweetResponseFromRecords(tweetId, mhRecords, statsReceiver.scope("getStoredTweet")))
      }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala
@ -0,0 +1,167 @@
 package com.twitter.tweetypie.storage
 import com.twitter.conversions.DurationOps._
 import com.twitter.finagle.stats.Counter
 import com.twitter.finagle.stats.NullStatsReceiver
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.logging.Logger
 import com.twitter.snowflake.id.SnowflakeId
 import com.twitter.stitch.Stitch
 import com.twitter.stitch.StitchSeqGroup
 import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
 import com.twitter.storage.client.manhattan.kv.ManhattanException
 import com.twitter.tweetypie.storage.TweetStateRecord.BounceDeleted
 import com.twitter.tweetypie.storage.TweetStateRecord.HardDeleted
 import com.twitter.tweetypie.storage.TweetStateRecord.SoftDeleted
 import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.util.Duration
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import com.twitter.util.Time
 object GetTweetHandler {
  private[this] val logger = Logger(getClass)
  //////////////////////////////////////////////////
  // Logging racy reads for later validation.
  val RacyTweetWindow: Duration = 10.seconds
  /**
   * If this read is soon after the tweet was created, then we would usually
   * expect it to be served from cache. This early read indicates that this
   * tweet is prone to consistency issues, so we log what's present in
   * Manhattan at the time of the read for later analysis.
   */
  private[this] def logRacyRead(tweetId: TweetId, records: Seq[TweetManhattanRecord]): Unit =
    if (SnowflakeId.isSnowflakeId(tweetId)) {
      val tweetAge = Time.now.since(SnowflakeId(tweetId).time)
      if (tweetAge <= RacyTweetWindow) {
        val sb = new StringBuilder
        sb.append("racy_tweet_read\t")
          .append(tweetId)
          .append('\t')
          .append(tweetAge.inMilliseconds) // Log the age for analysis purposes
        records.foreach { rec =>
          sb.append('\t')
            .append(rec.lkey)
          rec.value.timestamp.foreach { ts =>
            // If there is a timestamp for this key, log it so that we can tell
            // later on whether a value should have been present. We expect
            // keys written in a single write to have the same timestamp, and
            // generally, keys written in separate writes will have different
            // timestamps. The timestamp value is optional in Manhattan, but
            // we expect there to always be a value for the timestamp.
            sb.append(':')
              .append(ts.inMilliseconds)
          }
        }
        logger.info(sb.toString)
      }
    }
  /**
   * Convert a set of records from Manhattan into a GetTweet.Response.
   */
  def tweetResponseFromRecords(
    tweetId: TweetId,
    mhRecords: Seq[TweetManhattanRecord],
    statsReceiver: StatsReceiver = NullStatsReceiver
  ): GetTweet.Response =
    if (mhRecords.isEmpty) {
      GetTweet.Response.NotFound
    } else {
      // If no internal fields are present or no required fields present, we consider the tweet
      // as not returnable (even if some additional fields are present)
      def tweetFromRecords(tweetId: TweetId, mhRecords: Seq[TweetManhattanRecord]) = {
        val storedTweet = buildStoredTweet(tweetId, mhRecords)
        if (storedTweet.getFieldBlobs(expectedFields).nonEmpty) {
          if (isValid(storedTweet)) {
            statsReceiver.counter("valid").incr()
            Some(StorageConversions.fromStoredTweet(storedTweet))
          } else {
            log.info(s"Invalid Tweet Id: $tweetId")
            statsReceiver.counter("invalid").incr()
            None
          }
        } else {
          // The Tweet contained none of the fields defined in `expectedFields`
          log.info(s"Expected Fields Not Present Tweet Id: $tweetId")
          statsReceiver.counter("expected_fields_not_present").incr()
          None
        }
      }
      val stateRecord = TweetStateRecord.mostRecent(mhRecords)
      stateRecord match {
        // some  other cases don't require an attempt to construct a Tweet
        case Some(_: SoftDeleted) | Some(_: HardDeleted) => GetTweet.Response.Deleted
        // all other cases require an attempt to construct a Tweet, which may not be successful
        case _ =>
          logRacyRead(tweetId, mhRecords)
          (stateRecord, tweetFromRecords(tweetId, mhRecords)) match {
            // BounceDeleted contains the Tweet data so that callers can access data on the the
            // tweet (e.g. hard delete daemon requires conversationId and userId. There are no
            // plans for Tweetypie server to make use of the returned tweet at this time.
            case (Some(_: BounceDeleted), Some(tweet)) => GetTweet.Response.BounceDeleted(tweet)
            case (Some(_: BounceDeleted), None) => GetTweet.Response.Deleted
            case (_, Some(tweet)) => GetTweet.Response.Found(tweet)
            case _ => GetTweet.Response.NotFound
          }
      }
    }
  def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetTweet = {
    object stats {
      val getTweetScope = statsReceiver.scope("getTweet")
      val deniedCounter: Counter = getTweetScope.counter("mh_denied")
      val mhExceptionCounter: Counter = getTweetScope.counter("mh_exception")
      val nonFatalExceptionCounter: Counter = getTweetScope.counter("non_fatal_exception")
      val notFoundCounter: Counter = getTweetScope.counter("not_found")
    }
    object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] {
      override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = {
        Stats.addWidthStat("getTweet", "tweetIds", tweetIds.size, statsReceiver)
        Stitch.traverse(tweetIds)(read(_))
      }
    }
    tweetId =>
      if (tweetId <= 0) {
        Stitch.NotFound
      } else {
        Stitch
          .call(tweetId, mhGroup)
          .map(mhRecords => tweetResponseFromRecords(tweetId, mhRecords, stats.getTweetScope))
          .liftToTry
          .map {
            case Throw(mhException: DeniedManhattanException) =>
              stats.deniedCounter.incr()
              Throw(RateLimited("", mhException))
            // Encountered some other Manhattan error
            case t @ Throw(_: ManhattanException) =>
              stats.mhExceptionCounter.incr()
              t
            // Something else happened
            case t @ Throw(ex) =>
              stats.nonFatalExceptionCounter.incr()
              TweetUtils.log
                .warning(ex, s"Unhandled exception in GetTweetHandler for tweetId: $tweetId")
              t
            case r @ Return(GetTweet.Response.NotFound) =>
              stats.notFoundCounter.incr()
              r
            case r @ Return(_) => r
          }
          .lowerFromTry
      }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala
@ -0,0 +1,153 @@
 package com.twitter.tweetypie.storage
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.stitch.Stitch
 import com.twitter.tweetypie.storage.TweetKey.LKey.ForceAddedStateKey
 import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet
 import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet.Response._
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import com.twitter.util.Time
 import com.twitter.util.Try
 object HardDeleteTweetHandler {
  /**
   * When a tweet is removed lkeys with these prefixes will be deleted permanently.
   */
  private[storage] def isKeyToBeDeleted(key: TweetKey): Boolean =
    key.lKey match {
      case (TweetKey.LKey.CoreFieldsKey | TweetKey.LKey.InternalFieldsKey(_) |
          TweetKey.LKey.AdditionalFieldsKey(_) | TweetKey.LKey.SoftDeletionStateKey |
          TweetKey.LKey.BounceDeletionStateKey | TweetKey.LKey.UnDeletionStateKey |
          TweetKey.LKey.ForceAddedStateKey) =>
        true
      case _ => false
    }
  /**
   * When hard deleting, there are two actions, writing the record and
   * removing the tweet data. If we are performing any action, we will
   * always try to remove the tweet data. If the tweet does not yet have a
   * hard deletion record, then we will need to write one. This method
   * returns the HardDeleted record if it needs to be written, and None
   * if it has already been written.
   *
   * If the tweet is not in a deleted state we signal this with a
   * Throw(NotDeleted).
   */
  private[storage] def getHardDeleteStateRecord(
    tweetId: TweetId,
    records: Seq[TweetManhattanRecord],
    mhTimestamp: Time,
    stats: StatsReceiver
  ): Try[Option[TweetStateRecord.HardDeleted]] = {
    val mostRecent = TweetStateRecord.mostRecent(records)
    val currentStateStr = mostRecent.map(_.name).getOrElse("no_tweet_state_record")
    stats.counter(currentStateStr).incr()
    mostRecent match {
      case Some(
            record @ (TweetStateRecord.SoftDeleted(_, _) | TweetStateRecord.BounceDeleted(_, _))) =>
        Return(
          Some(
            TweetStateRecord.HardDeleted(
              tweetId = tweetId,
              // createdAt is the hard deletion timestamp when dealing with hard deletes in Manhattan
              createdAt = mhTimestamp.inMillis,
              // deletedAt is the soft deletion timestamp when dealing with hard deletes in Manhattan
              deletedAt = record.createdAt
            )
          )
        )
      case Some(_: TweetStateRecord.HardDeleted) =>
        Return(None)
      case Some(_: TweetStateRecord.ForceAdded) =>
        Throw(NotDeleted(tweetId, Some(ForceAddedStateKey)))
      case Some(_: TweetStateRecord.Undeleted) =>
        Throw(NotDeleted(tweetId, Some(TweetKey.LKey.UnDeletionStateKey)))
      case None =>
        Throw(NotDeleted(tweetId, None))
    }
  }
  /**
   * This handler returns HardDeleteTweet.Response.Deleted if data associated with the tweet is deleted,
   * either as a result of this request or a previous one.
   *
   * The most recently added record determines the tweet's state. This method will only delete data
   * for tweets in the soft-delete or hard-delete state. (Calling hardDeleteTweet for tweets that have
   * already been hard-deleted will remove any lkeys that may not have been deleted previously).
   */
  def apply(
    read: ManhattanOperations.Read,
    insert: ManhattanOperations.Insert,
    delete: ManhattanOperations.Delete,
    scribe: Scribe,
    stats: StatsReceiver
  ): TweetId => Stitch[HardDeleteTweet.Response] = {
    val hardDeleteStats = stats.scope("hardDeleteTweet")
    val hardDeleteTweetCancelled = hardDeleteStats.counter("cancelled")
    val beforeStateStats = hardDeleteStats.scope("before_state")
    def removeRecords(keys: Seq[TweetKey], mhTimestamp: Time): Stitch[Unit] =
      Stitch
        .collect(keys.map(key => delete(key, Some(mhTimestamp)).liftToTry))
        .map(collectWithRateLimitCheck)
        .lowerFromTry
    def writeRecord(record: Option[TweetStateRecord.HardDeleted]): Stitch[Unit] =
      record match {
        case Some(r) =>
          insert(r.toTweetMhRecord).onSuccess { _ =>
            scribe.logRemoved(
              r.tweetId,
              Time.fromMilliseconds(r.createdAt),
              isSoftDeleted = false
            )
          }
        case None => Stitch.Unit
      }
    tweetId =>
      read(tweetId)
        .flatMap { records =>
          val hardDeletionTimestamp = Time.now
          val keysToBeDeleted: Seq[TweetKey] = records.map(_.key).filter(isKeyToBeDeleted)
          getHardDeleteStateRecord(
            tweetId,
            records,
            hardDeletionTimestamp,
            beforeStateStats) match {
            case Return(record) =>
              Stitch
                .join(
                  writeRecord(record),
                  removeRecords(keysToBeDeleted, hardDeletionTimestamp)
                ).map(_ =>
                  // If the tweetId is non-snowflake and has previously been hard deleted
                  // there will be no coreData record to fall back on to get the tweet
                  // creation time and createdAtMillis will be None.
                  Deleted(
                    // deletedAtMillis: when the tweet was hard deleted
                    deletedAtMillis = Some(hardDeletionTimestamp.inMillis),
                    // createdAtMillis: when the tweet itself was created
                    // (as opposed to when the deletion record was created)
                    createdAtMillis =
                      TweetUtils.creationTimeFromTweetIdOrMHRecords(tweetId, records)
                  ))
            case Throw(notDeleted: NotDeleted) =>
              hardDeleteTweetCancelled.incr()
              Stitch.value(notDeleted)
            case Throw(e) => Stitch.exception(e) // this should never happen
          }
        }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala
@ -0,0 +1,228 @@
 package com.twitter.tweetypie.storage
 import com.google.common.base.CaseFormat
 import com.twitter.conversions.DurationOps._
 import com.twitter.finagle.mtls.authentication.ServiceIdentifier
 import com.twitter.scrooge.TFieldBlob
 import com.twitter.scrooge.ThriftStructFieldInfo
 import com.twitter.stitch.Stitch
 import com.twitter.storage.client.manhattan.kv._
 import com.twitter.tweetypie.additionalfields.AdditionalFields
 import com.twitter.tweetypie.storage.ManhattanOperations.Read
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
 import com.twitter.tweetypie.thriftscala.{Tweet => TweetypieTweet}
 import com.twitter.util.Duration
 import com.twitter.util.Future
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import diffshow.Container
 import diffshow.DiffShow
 import diffshow.Expr
 import org.apache.commons.codec.binary.Base64
 import scala.util.Try
 import shapeless.Cached
 import shapeless.Strict
 // This class is used by the Tweetypie Console to inspect tweet field content in Manhattan
 class InspectFields(svcIdentifier: ServiceIdentifier) {
  val mhApplicationId = "tbird_mh"
  val mhDatasetName = "tbird_mh"
  val mhDestinationName = "/s/manhattan/cylon.native-thrift"
  val mhTimeout: Duration = 5000.milliseconds
  val localMhEndpoint: ManhattanKVEndpoint =
    ManhattanKVEndpointBuilder(
      ManhattanKVClient(
        mhApplicationId,
        mhDestinationName,
        ManhattanKVClientMtlsParams(svcIdentifier)))
      .defaultGuarantee(Guarantee.SoftDcReadMyWrites)
      .defaultMaxTimeout(mhTimeout)
      .build()
  val readOperation: Read = (new ManhattanOperations(mhDatasetName, localMhEndpoint)).read
  def lookup(tweetId: Long): Future[String] = {
    val result = readOperation(tweetId).liftToTry.map {
      case Return(mhRecords) =>
        prettyPrintManhattanRecords(tweetId, TweetKey.padTweetIdStr(tweetId), mhRecords)
      case Throw(e) => e.toString
    }
    Stitch.run(result)
  }
  def storedTweet(tweetId: Long): Future[StoredTweet] = {
    val result = readOperation(tweetId).liftToTry.map {
      case Return(mhRecords) =>
        buildStoredTweet(tweetId, mhRecords)
      case Throw(e) =>
        throw e
    }
    Stitch.run(result)
  }
  private[this] def prettyPrintManhattanRecords(
    tweetId: Long,
    pkey: String,
    mhRecords: Seq[TweetManhattanRecord]
  ): String = {
    if (mhRecords.isEmpty) {
      "Not Found"
    } else {
      val formattedRecords = getFormattedManhattanRecords(tweetId, mhRecords)
      val keyFieldWidth = formattedRecords.map(_.key.length).max + 2
      val fieldNameFieldWidth = formattedRecords.map(_.fieldName.length).max + 2
      val formatString = s"    %-${keyFieldWidth}s %-${fieldNameFieldWidth}s %s"
      val recordsString =
        formattedRecords
          .map { record =>
            val content = record.content.replaceAll("\n", "\n" + formatString.format("", "", ""))
            formatString.format(record.key, record.fieldName, content)
          }
          .mkString("\n")
      "/tbird_mh/" + pkey + "/" + "\n" + recordsString
    }
  }
  private[this] def getFormattedManhattanRecords(
    tweetId: Long,
    mhRecords: Seq[TweetManhattanRecord]
  ): Seq[FormattedManhattanRecord] = {
    val storedTweet = buildStoredTweet(tweetId, mhRecords).copy(updatedAt = None)
    val tweetypieTweet: Option[TweetypieTweet] =
      Try(StorageConversions.fromStoredTweet(storedTweet)).toOption
    val blobMap: Map[String, TFieldBlob] = getStoredTweetBlobs(mhRecords).map { blob =>
      getFieldName(blob.field.id) -> blob
    }.toMap
    mhRecords
      .map {
        case TweetManhattanRecord(fullKey, mhValue) =>
          FormattedManhattanRecord(
            key = fullKey.lKey.toString,
            fieldName = getFieldName(fullKey.lKey),
            content = prettyPrintManhattanValue(
              fullKey.lKey,
              mhValue,
              storedTweet,
              tweetypieTweet,
              tweetId,
              blobMap
            )
          )
      }
      .sortBy(_.key.replace("external", "xternal")) // sort by key, with internal first
  }
  private[this] def getFieldNameFromThrift(
    fieldId: Short,
    fieldInfos: List[ThriftStructFieldInfo]
  ): String =
    fieldInfos
      .find(info => info.tfield.id == fieldId)
      .map(_.tfield.name)
      .getOrElse("<UNKNOWN FIELD>")
  private[this] def isLkeyScrubbedField(lkey: String): Boolean =
    lkey.split("/")(1) == "scrubbed_fields"
  private[this] def getFieldName(lkey: TweetKey.LKey): String =
    lkey match {
      case fieldKey: TweetKey.LKey.FieldKey => getFieldName(fieldKey.fieldId)
      case _ => ""
    }
  private[this] def getFieldName(fieldId: Short): String =
    if (fieldId == 1) {
      "core_fields"
    } else if (AdditionalFields.isAdditionalFieldId(fieldId)) {
      getFieldNameFromThrift(fieldId, TweetypieTweet.fieldInfos)
    } else {
      getFieldNameFromThrift(fieldId, StoredTweet.fieldInfos)
    }
  private[this] def prettyPrintManhattanValue(
    lkey: TweetKey.LKey,
    mhValue: TweetManhattanValue,
    storedTweet: StoredTweet,
    tweetypieTweet: Option[TweetypieTweet],
    tweetId: Long,
    tfieldBlobs: Map[String, TFieldBlob]
  ): String = {
    val decoded = lkey match {
      case _: TweetKey.LKey.MetadataKey =>
        decodeMetadata(mhValue)
      case fieldKey: TweetKey.LKey.FieldKey =>
        tfieldBlobs
          .get(getFieldName(fieldKey.fieldId))
          .map(blob => decodeField(tweetId, blob, storedTweet, tweetypieTweet))
      case _ =>
        None
    }
    decoded.getOrElse { // If all else fails, encode the data as a base64 string
      val contents = mhValue.contents.array
      if (contents.isEmpty) {
        "<NO DATA>"
      } else {
        Base64.encodeBase64String(contents)
      }
    }
  }
  private[this] def decodeMetadata(mhValue: TweetManhattanValue): Option[String] = {
    val byteArray = ByteArrayCodec.fromByteBuffer(mhValue.contents)
    Try(Json.decode(byteArray).toString).toOption
  }
  private[this] def decodeField(
    tweetId: Long,
    blob: TFieldBlob,
    storedTweet: StoredTweet,
    tweetypieTweet: Option[TweetypieTweet]
  ): String = {
    val fieldId = blob.field.id
    if (fieldId == 1) {
      coreFields(storedTweet)
    } else if (AdditionalFields.isAdditionalFieldId(fieldId)) {
      decodeTweetWithOneField(TweetypieTweet(tweetId).setField(blob))
    } else {
      decodeTweetWithOneField(StoredTweet(tweetId).setField(blob))
    }
  }
  // Takes a Tweet or StoredTweet with a single field set and returns the value of that field
  private[this] def decodeTweetWithOneField[T](
    tweetWithOneField: T
  )(
    implicit ev: Cached[Strict[DiffShow[T]]]
  ): String = {
    val config = diffshow.Config(hideFieldWithEmptyVal = true)
    val tree: Expr = config.transform(DiffShow.show(tweetWithOneField))
    // matches a Tweet or StoredTweet with two values, the first being the id
    val value = tree.transform {
      case Container(_, List(diffshow.Field("id", _), diffshow.Field(_, value))) => value
    }
    config.exprPrinter.apply(value, width = 80).render
  }
  private[this] def coreFields(storedTweet: StoredTweet): String =
    diffshow.show(CoreFieldsCodec.fromTweet(storedTweet), hideFieldWithEmptyVal = true)
  private[this] def toCamelCase(s: String): String =
    CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, s)
 }
 case class FormattedManhattanRecord(key: String, fieldName: String, content: String)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala
@ -0,0 +1,17 @@
 package com.twitter.tweetypie.storage
 import com.fasterxml.jackson.databind.ObjectMapper
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
 object Json {
  val TimestampKey = "timestamp"
  val SoftDeleteTimestampKey = "softdelete_timestamp"
  private val mapper = new ObjectMapper
  mapper.registerModule(DefaultScalaModule)
  def encode(m: Map[String, Any]): Array[Byte] = mapper.writeValueAsBytes(m)
  def decode(arr: Array[Byte]): Map[String, Any] =
    mapper.readValue[Map[String, Any]](arr, classOf[Map[String, Any]])
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala
@ -0,0 +1,103 @@
 package com.twitter.tweetypie.storage
 import com.twitter.bijection.Injection
 import com.twitter.io.Buf
 import com.twitter.stitch.Stitch
 import com.twitter.storage.client.manhattan.bijections.Bijections.BufInjection
 import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpoint
 import com.twitter.storage.client.manhattan.kv.impl.DescriptorP1L1
 import com.twitter.storage.client.manhattan.kv.impl.Component
 import com.twitter.storage.client.manhattan.kv.{impl => mh}
 import com.twitter.storage.client.manhattan.bijections.Bijections.StringInjection
 import com.twitter.util.Time
 import java.nio.ByteBuffer
 import scala.util.control.NonFatal
 case class TweetManhattanRecord(key: TweetKey, value: TweetManhattanValue) {
  def pkey: TweetId = key.tweetId
  def lkey: TweetKey.LKey = key.lKey
  /**
   * Produces a representation that is human-readable, but contains
   * all of the information from the record. It is not intended for
   * producing machine-readable values.
   *
   * This conversion is relatively expensive, so beware of using it in
   * hot code paths.
   */
  override def toString: String = {
    val valueString =
      try {
        key.lKey match {
          case _: TweetKey.LKey.MetadataKey =>
            StringCodec.fromByteBuffer(value.contents)
          case _: TweetKey.LKey.FieldKey =>
            val tFieldBlob = TFieldBlobCodec.fromByteBuffer(value.contents)
            s"TFieldBlob(${tFieldBlob.field}, 0x${Buf.slowHexString(tFieldBlob.content)})"
          case TweetKey.LKey.Unknown(_) =>
            "0x" + Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents))
        }
      } catch {
        case NonFatal(e) =>
          val hexValue = Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents))
          s"0x$hexValue (failed to decode due to $e)"
      }
    s"$key => ${value.copy(contents = valueString)}"
  }
 }
 object ManhattanOperations {
  type Read = TweetId => Stitch[Seq[TweetManhattanRecord]]
  type Insert = TweetManhattanRecord => Stitch[Unit]
  type Delete = (TweetKey, Option[Time]) => Stitch[Unit]
  type DeleteRange = TweetId => Stitch[Unit]
  object PkeyInjection extends Injection[TweetId, String] {
    override def apply(tweetId: TweetId): String = TweetKey.padTweetIdStr(tweetId)
    override def invert(str: String): scala.util.Try[TweetId] = scala.util.Try(str.toLong)
  }
  case class InvalidLkey(lkeyStr: String) extends Exception
  object LkeyInjection extends Injection[TweetKey.LKey, String] {
    override def apply(lkey: TweetKey.LKey): String = lkey.toString
    override def invert(str: String): scala.util.Try[TweetKey.LKey] =
      scala.util.Success(TweetKey.LKey.fromString(str))
  }
  val KeyDescriptor: DescriptorP1L1.EmptyKey[TweetId, TweetKey.LKey] =
    mh.KeyDescriptor(
      Component(PkeyInjection.andThen(StringInjection)),
      Component(LkeyInjection.andThen(StringInjection))
    )
  val ValueDescriptor: mh.ValueDescriptor.EmptyValue[ByteBuffer] = mh.ValueDescriptor(BufInjection)
 }
 class ManhattanOperations(dataset: String, mhEndpoint: ManhattanKVEndpoint) {
  import ManhattanOperations._
  private[this] def pkey(tweetId: TweetId) = KeyDescriptor.withDataset(dataset).withPkey(tweetId)
  def read: Read = { tweetId =>
    mhEndpoint.slice(pkey(tweetId).under(), ValueDescriptor).map { mhData =>
      mhData.map {
        case (key, value) => TweetManhattanRecord(TweetKey(key.pkey, key.lkey), value)
      }
    }
  }
  def insert: Insert =
    record => {
      val mhKey = pkey(record.key.tweetId).withLkey(record.key.lKey)
      mhEndpoint.insert(mhKey, ValueDescriptor.withValue(record.value))
    }
  def delete: Delete = (key, time) => mhEndpoint.delete(pkey(key.tweetId).withLkey(key.lKey), time)
  def deleteRange: DeleteRange =
    tweetId => mhEndpoint.deleteRange(KeyDescriptor.withDataset(dataset).withPkey(tweetId).under())
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala
@ -0,0 +1,451 @@
 package com.twitter.tweetypie.storage
 import com.twitter.conversions.DurationOps._
 import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier
 import com.twitter.finagle.mtls.authentication.ServiceIdentifier
 import com.twitter.finagle.ssl.OpportunisticTls
 import com.twitter.finagle.stats.NullStatsReceiver
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.logging.BareFormatter
 import com.twitter.logging.Level
 import com.twitter.logging.ScribeHandler
 import com.twitter.logging._
 import com.twitter.stitch.Stitch
 import com.twitter.storage.client.manhattan.bijections.Bijections._
 import com.twitter.storage.client.manhattan.kv._
 import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor
 import com.twitter.tweetypie.client_id.ClientIdHelper
 import com.twitter.tweetypie.storage.Scribe.ScribeHandlerFactory
 import com.twitter.tweetypie.storage.TweetStorageClient.BounceDelete
 import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet
 import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet
 import com.twitter.tweetypie.thriftscala.Tweet
 import com.twitter.tweetypie.util.StitchUtils
 import com.twitter.util.Duration
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import scala.util.Random
 object ManhattanTweetStorageClient {
  object Config {
    /**
     * The Manhattan dataset where tweets are stored is not externally
     * configurable because writing tweets to a non-production dataset
     * requires great care. Staging instances using a different dataset will
     * write tweets to a non-production store, but will publish events, log to
     * HDFS, and cache data referencing tweets in that store which are not
     * accessible by the rest of the production cluster.
     *
     * In a completely isolated environment it should be safe to write to
     * other datasets for testing purposes.
     */
    val Dataset = "tbird_mh"
    /**
     * Once a tweet has been deleted it can only be undeleted within this time
     * window, after which [[UndeleteHandler]] will return an error on
     * undelete attempts.
     */
    val UndeleteWindowHours = 240
    /**
     * Default label used for underlying Manhattan Thrift client metrics
     *
     * The finagle client metrics will be exported at clnt/:label.
     */
    val ThriftClientLabel = "mh_cylon"
    /**
     * Return the corresponding Wily path for the Cylon cluster in the "other" DC
     */
    def remoteDestination(zone: String): String =
      s"/srv#/prod/${remoteZone(zone)}/manhattan/cylon.native-thrift"
    private def remoteZone(zone: String) = zone match {
      case "pdxa" => "atla"
      case "atla" | "localhost" => "pdxa"
      case _ =>
        throw new IllegalArgumentException(s"Cannot configure remote DC for unknown zone '$zone'")
    }
  }
  /**
   * @param applicationId Manhattan application id used for quota accounting
   * @param localDestination Wily path to local Manhattan cluster
   * @param localTimeout Overall timeout (including retries) for all reads/writes to local cluster
   * @param remoteDestination Wily path to remote Manhattan cluster, used for undelete and force add
   * @param remoteTimeout Overall timeout (including retries) for all reads/writes to remote cluster
   * @param undeleteWindowHours Amount of time during which a deleted tweet can be undeleted
   * @param thriftClientLabel Label used to scope stats for Manhattan Thrift client
   * @param maxRequestsPerBatch Configure the Stitch RequestGroup.Generator batch size
   * @param serviceIdentifier The ServiceIdentifier to use when making connections to a Manhattan cluster
   * @param opportunisticTlsLevel The level to use for opportunistic TLS for connections to the Manhattan cluster
   */
  case class Config(
    applicationId: String,
    localDestination: String,
    localTimeout: Duration,
    remoteDestination: String,
    remoteTimeout: Duration,
    undeleteWindowHours: Int = Config.UndeleteWindowHours,
    thriftClientLabel: String = Config.ThriftClientLabel,
    maxRequestsPerBatch: Int = Int.MaxValue,
    serviceIdentifier: ServiceIdentifier,
    opportunisticTlsLevel: OpportunisticTls.Level)
  /**
   * Sanitizes the input for APIs which take in a (Tweet, Seq[Field]) as input.
   *
   * NOTE: This function only applies sanity checks which are common to
   * all APIs which take in a (Tweet, Seq[Field]) as input. API specific
   * checks are not covered here.
   *
   * @param apiStitch the backing API call
   * @tparam T the output type of the backing API call
   * @return a stitch function which does some basic input sanity checking
   */
  private[storage] def sanitizeTweetFields[T](
    apiStitch: (Tweet, Seq[Field]) => Stitch[T]
  ): (Tweet, Seq[Field]) => Stitch[T] =
    (tweet, fields) => {
      require(fields.forall(_.id > 0), s"Field ids ${fields} are not positive numbers")
      apiStitch(tweet, fields)
    }
  // Returns a handler that asynchronously logs messages to Scribe using the BareFormatter which
  // logs just the message without any additional metadata
  def scribeHandler(categoryName: String): HandlerFactory =
    ScribeHandler(
      formatter = BareFormatter,
      maxMessagesPerTransaction = 100,
      category = categoryName,
      level = Some(Level.TRACE)
    )
  /**
   * A Config appropriate for interactive sessions and scripts.
   */
  def develConfig(): Config =
    Config(
      applicationId = Option(System.getenv("USER")).getOrElse("<unknown>") + ".devel",
      localDestination = "/s/manhattan/cylon.native-thrift",
      localTimeout = 10.seconds,
      remoteDestination = "/s/manhattan/cylon.native-thrift",
      remoteTimeout = 10.seconds,
      undeleteWindowHours = Config.UndeleteWindowHours,
      thriftClientLabel = Config.ThriftClientLabel,
      maxRequestsPerBatch = Int.MaxValue,
      serviceIdentifier = ServiceIdentifier(System.getenv("USER"), "tweetypie", "devel", "local"),
      opportunisticTlsLevel = OpportunisticTls.Required
    )
  /**
   * Build a Manhattan tweet storage client for use in interactive
   * sessions and scripts.
   */
  def devel(): TweetStorageClient =
    new ManhattanTweetStorageClient(
      develConfig(),
      NullStatsReceiver,
      ClientIdHelper.default,
    )
 }
 class ManhattanTweetStorageClient(
  config: ManhattanTweetStorageClient.Config,
  statsReceiver: StatsReceiver,
  private val clientIdHelper: ClientIdHelper)
    extends TweetStorageClient {
  import ManhattanTweetStorageClient._
  lazy val scribeHandlerFactory: ScribeHandlerFactory = scribeHandler _
  val scribe: Scribe = new Scribe(scribeHandlerFactory, statsReceiver)
  def mkClient(
    dest: String,
    label: String
  ): ManhattanKVClient = {
    val mhMtlsParams =
      if (config.serviceIdentifier == EmptyServiceIdentifier) NoMtlsParams
      else
        ManhattanKVClientMtlsParams(
          serviceIdentifier = config.serviceIdentifier,
          opportunisticTls = config.opportunisticTlsLevel
        )
    new ManhattanKVClient(
      config.applicationId,
      dest,
      mhMtlsParams,
      label,
      Seq(Experiments.ApertureLoadBalancer))
  }
  val localClient: ManhattanKVClient = mkClient(config.localDestination, config.thriftClientLabel)
  val localMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(localClient)
    .defaultGuarantee(Guarantee.SoftDcReadMyWrites)
    .defaultMaxTimeout(config.localTimeout)
    .maxRequestsPerBatch(config.maxRequestsPerBatch)
    .build()
  val localManhattanOperations = new ManhattanOperations(Config.Dataset, localMhEndpoint)
  val remoteClient: ManhattanKVClient =
    mkClient(config.remoteDestination, s"${config.thriftClientLabel}_remote")
  val remoteMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(remoteClient)
    .defaultGuarantee(Guarantee.SoftDcReadMyWrites)
    .defaultMaxTimeout(config.remoteTimeout)
    .build()
  val remoteManhattanOperations = new ManhattanOperations(Config.Dataset, remoteMhEndpoint)
  /**
   * Note: This translation is only useful for non-batch endpoints. Batch endpoints currently
   * represent failure without propagating an exception
   * (e.g. [[com.twitter.tweetypie.storage.Response.TweetResponseCode.Failure]]).
   */
  private[this] def translateExceptions(
    apiName: String,
    statsReceiver: StatsReceiver
  ): PartialFunction[Throwable, Throwable] = {
    case e: IllegalArgumentException => ClientError(e.getMessage, e)
    case e: DeniedManhattanException => RateLimited(e.getMessage, e)
    case e: VersionMismatchError =>
      statsReceiver.scope(apiName).counter("mh_version_mismatches").incr()
      e
    case e: InternalError =>
      TweetUtils.log.error(e, s"Error processing $apiName request: ${e.getMessage}")
      e
  }
  /**
   * Count requests per client id producing metrics of the form
   * .../clients/:root_client_id/requests
   */
  def observeClientId[A, B](
    apiStitch: A => Stitch[B],
    statsReceiver: StatsReceiver,
    clientIdHelper: ClientIdHelper,
  ): A => Stitch[B] = {
    val clients = statsReceiver.scope("clients")
    val incrementClientRequests = { args: A =>
      val clientId = clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId)
      clients.counter(clientId, "requests").incr
    }
    a => {
      incrementClientRequests(a)
      apiStitch(a)
    }
  }
  /**
   * Increment counters based on the overall response status of the returned [[GetTweet.Response]].
   */
  def observeGetTweetResponseCode[A](
    apiStitch: A => Stitch[GetTweet.Response],
    statsReceiver: StatsReceiver
  ): A => Stitch[GetTweet.Response] = {
    val scope = statsReceiver.scope("response_code")
    val success = scope.counter("success")
    val notFound = scope.counter("not_found")
    val failure = scope.counter("failure")
    val overCapacity = scope.counter("over_capacity")
    val deleted = scope.counter("deleted")
    val bounceDeleted = scope.counter("bounce_deleted")
    a =>
      apiStitch(a).respond {
        case Return(_: GetTweet.Response.Found) => success.incr()
        case Return(GetTweet.Response.NotFound) => notFound.incr()
        case Return(_: GetTweet.Response.BounceDeleted) => bounceDeleted.incr()
        case Return(GetTweet.Response.Deleted) => deleted.incr()
        case Throw(_: RateLimited) => overCapacity.incr()
        case Throw(_) => failure.incr()
      }
  }
  /**
   * We do 3 things here:
   *
   * - Bookkeeping for overall requests
   * - Bookkeeping for per api requests
   * - Translate exceptions
   *
   * @param apiName the API being called
   * @param apiStitch the implementation of the API
   * @tparam A template for input type of API
   * @tparam B template for output type of API
   * @return Function which executes the given API call
   */
  private[storage] def endpoint[A, B](
    apiName: String,
    apiStitch: A => Stitch[B]
  ): A => Stitch[B] = {
    val translateException = translateExceptions(apiName, statsReceiver)
    val observe = StitchUtils.observe[B](statsReceiver, apiName)
    a =>
      StitchUtils.translateExceptions(
        observe(apiStitch(a)),
        translateException
      )
  }
  private[storage] def endpoint2[A, B, C](
    apiName: String,
    apiStitch: (A, B) => Stitch[C],
    clientIdHelper: ClientIdHelper,
  ): (A, B) => Stitch[C] =
    Function.untupled(endpoint(apiName, apiStitch.tupled))
  val getTweet: TweetStorageClient.GetTweet = {
    val stats = statsReceiver.scope("getTweet")
    observeClientId(
      observeGetTweetResponseCode(
        endpoint(
          "getTweet",
          GetTweetHandler(
            read = localManhattanOperations.read,
            statsReceiver = stats,
          )
        ),
        stats,
      ),
      stats,
      clientIdHelper,
    )
  }
  val getStoredTweet: TweetStorageClient.GetStoredTweet = {
    val stats = statsReceiver.scope("getStoredTweet")
    observeClientId(
      endpoint(
        "getStoredTweet",
        GetStoredTweetHandler(
          read = localManhattanOperations.read,
          statsReceiver = stats,
        )
      ),
      stats,
      clientIdHelper,
    )
  }
  val addTweet: TweetStorageClient.AddTweet =
    endpoint(
      "addTweet",
      AddTweetHandler(
        insert = localManhattanOperations.insert,
        scribe = scribe,
        stats = statsReceiver
      )
    )
  val updateTweet: TweetStorageClient.UpdateTweet =
    endpoint2(
      "updateTweet",
      ManhattanTweetStorageClient.sanitizeTweetFields(
        UpdateTweetHandler(
          insert = localManhattanOperations.insert,
          stats = statsReceiver,
        )
      ),
      clientIdHelper,
    )
  val softDelete: TweetStorageClient.SoftDelete =
    endpoint(
      "softDelete",
      SoftDeleteHandler(
        insert = localManhattanOperations.insert,
        scribe = scribe
      )
    )
  val bounceDelete: BounceDelete =
    endpoint(
      "bounceDelete",
      BounceDeleteHandler(
        insert = localManhattanOperations.insert,
        scribe = scribe
      )
    )
  val undelete: TweetStorageClient.Undelete =
    endpoint(
      "undelete",
      UndeleteHandler(
        read = localManhattanOperations.read,
        localInsert = localManhattanOperations.insert,
        remoteInsert = remoteManhattanOperations.insert,
        delete = localManhattanOperations.delete,
        undeleteWindowHours = config.undeleteWindowHours,
        stats = statsReceiver
      )
    )
  val getDeletedTweets: TweetStorageClient.GetDeletedTweets =
    endpoint(
      "getDeletedTweets",
      GetDeletedTweetsHandler(
        read = localManhattanOperations.read,
        stats = statsReceiver
      )
    )
  val deleteAdditionalFields: TweetStorageClient.DeleteAdditionalFields =
    endpoint2(
      "deleteAdditionalFields",
      DeleteAdditionalFieldsHandler(
        delete = localManhattanOperations.delete,
        stats = statsReceiver,
      ),
      clientIdHelper,
    )
  val scrub: TweetStorageClient.Scrub =
    endpoint2(
      "scrub",
      ScrubHandler(
        insert = localManhattanOperations.insert,
        delete = localManhattanOperations.delete,
        scribe = scribe,
        stats = statsReceiver,
      ),
      clientIdHelper,
    )
  val hardDeleteTweet: HardDeleteTweet =
    endpoint(
      "hardDeleteTweet",
      HardDeleteTweetHandler(
        read = localManhattanOperations.read,
        insert = localManhattanOperations.insert,
        delete = localManhattanOperations.delete,
        scribe = scribe,
        stats = statsReceiver
      )
    )
  val ping: TweetStorageClient.Ping =
    () =>
      Stitch
        .run(
          localMhEndpoint
            .get(
              ManhattanOperations.KeyDescriptor
                .withDataset(Config.Dataset)
                .withPkey(Random.nextLong().abs)
                .withLkey(TweetKey.LKey.CoreFieldsKey), // could be any lkey
              ValueDescriptor(BufInjection)
            ).unit
        )
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala
@ -0,0 +1,30 @@
 package com.twitter.tweetypie.storage
 object Response {
  case class TweetResponse(
    tweetId: Long,
    overallResponse: TweetResponseCode,
    additionalFieldResponses: Option[Map[Short, FieldResponse]] = None)
  sealed trait TweetResponseCode
  object TweetResponseCode {
    object Success extends TweetResponseCode
    object Partial extends TweetResponseCode
    object Failure extends TweetResponseCode
    object OverCapacity extends TweetResponseCode
    object Deleted extends TweetResponseCode
  }
  case class FieldResponse(code: FieldResponseCode, message: Option[String] = None)
  sealed trait FieldResponseCode
  object FieldResponseCode {
    object Success extends FieldResponseCode
    object InvalidRequest extends FieldResponseCode
    object ValueNotFound extends FieldResponseCode
    object Timeout extends FieldResponseCode
    object Error extends FieldResponseCode
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala
@ -0,0 +1,85 @@
 package com.twitter.tweetypie.storage
 import com.twitter.servo.util.FutureEffect
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.logging._
 import com.twitter.scrooge.BinaryThriftStructSerializer
 import com.twitter.servo.util.{Scribe => ServoScribe}
 import com.twitter.tweetypie.storage_internal.thriftscala._
 import com.twitter.tbird.thriftscala.Added
 import com.twitter.tbird.thriftscala.Removed
 import com.twitter.tbird.thriftscala.Scrubbed
 import com.twitter.util.Time
 /**
 * Scribe is used to log tweet writes which are used to generate /tables/statuses in HDFS.
 *
 * Write   Scribe Category      Message
 * -----   ---------------      -------
 * add     tbird_add_status     [[com.twitter.tbird.thriftscala.Added]]
 * remove  tbird_remove_status  [[com.twitter.tbird.thriftscala.Removed]]
 * scrub   tbird_scrub_status   [[com.twitter.tbird.thriftscala.Scrubbed]]
 *
 * The thrift representation is encoded using binary thrift protocol format, followed by base64
 * encoding and converted to string using default character set (utf8). The logger uses BareFormatter.
 *
 * The thrift ops are scribed only after the write API call has succeeded.
 *
 * The class is thread safe except initial configuration and registration routines,
 * and no exception is expected unless java heap is out of memory.
 *
 * If exception does get thrown, add/remove/scrub operations will fail and
 * client will have to retry
 */
 class Scribe(factory: Scribe.ScribeHandlerFactory, statsReceiver: StatsReceiver) {
  import Scribe._
  private val AddedSerializer = BinaryThriftStructSerializer(Added)
  private val RemovedSerializer = BinaryThriftStructSerializer(Removed)
  private val ScrubbedSerializer = BinaryThriftStructSerializer(Scrubbed)
  private val addCounter = statsReceiver.counter("scribe/add/count")
  private val removeCounter = statsReceiver.counter("scribe/remove/count")
  private val scrubCounter = statsReceiver.counter("scribe/scrub/count")
  val addHandler: FutureEffect[String] = ServoScribe(factory(scribeAddedCategory)())
  val removeHandler: FutureEffect[String] = ServoScribe(factory(scribeRemovedCategory)())
  val scrubHandler: FutureEffect[String] = ServoScribe(factory(scribeScrubbedCategory)())
  private def addedToString(tweet: StoredTweet): String =
    AddedSerializer.toString(
      Added(StatusConversions.toTBirdStatus(tweet), Time.now.inMilliseconds, Some(false))
    )
  private def removedToString(id: Long, at: Time, isSoftDeleted: Boolean): String =
    RemovedSerializer.toString(Removed(id, at.inMilliseconds, Some(isSoftDeleted)))
  private def scrubbedToString(id: Long, cols: Seq[Int], at: Time): String =
    ScrubbedSerializer.toString(Scrubbed(id, cols, at.inMilliseconds))
  def logAdded(tweet: StoredTweet): Unit = {
    addHandler(addedToString(tweet))
    addCounter.incr()
  }
  def logRemoved(id: Long, at: Time, isSoftDeleted: Boolean): Unit = {
    removeHandler(removedToString(id, at, isSoftDeleted))
    removeCounter.incr()
  }
  def logScrubbed(id: Long, cols: Seq[Int], at: Time): Unit = {
    scrubHandler(scrubbedToString(id, cols, at))
    scrubCounter.incr()
  }
 }
 object Scribe {
  type ScribeHandlerFactory = (String) => HandlerFactory
  /** WARNING: These categories are white-listed. If you are changing them, the new categories should be white-listed.
   *  You should followup with CoreWorkflows team (CW) for that.
   */
  private val scribeAddedCategory = "tbird_add_status"
  private val scribeRemovedCategory = "tbird_remove_status"
  private val scribeScrubbedCategory = "tbird_scrub_status"
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala
@ -0,0 +1,71 @@
 package com.twitter.tweetypie.storage
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.stitch.Stitch
 import com.twitter.storage.client.manhattan.kv.ManhattanValue
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.util.Time
 /**
 * Deletes data for the scrubbed field and writes a metadata record.
 * Provides scrub functionality. Right now, we only allow the scrubbing of the geo field.
 * It should be simple to add more fields to the allowlist if needed.
 */
 object ScrubHandler {
  val scrubFieldsAllowlist: Set[Field] = Set(Field.Geo)
  def apply(
    insert: ManhattanOperations.Insert,
    delete: ManhattanOperations.Delete,
    scribe: Scribe,
    stats: StatsReceiver
  ): TweetStorageClient.Scrub =
    (unfilteredTweetIds: Seq[TweetId], columns: Seq[Field]) => {
      val tweetIds = unfilteredTweetIds.filter(_ > 0)
      require(columns.nonEmpty, "Must specify fields to scrub")
      require(
        columns.toSet.size == columns.size,
        s"Duplicate fields to scrub specified: $columns"
      )
      require(
        columns.forall(scrubFieldsAllowlist.contains(_)),
        s"Cannot scrub $columns; scrubbable fields are restricted to $scrubFieldsAllowlist"
      )
      Stats.addWidthStat("scrub", "ids", tweetIds.size, stats)
      val mhTimestamp = Time.now
      val stitches = tweetIds.map { tweetId =>
        val deletionStitches = columns.map { field =>
          val mhKeyToDelete = TweetKey.fieldKey(tweetId, field.id)
          delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry
        }
        val collectedStitch =
          Stitch.collect(deletionStitches).map(collectWithRateLimitCheck).lowerFromTry
        collectedStitch
          .flatMap { _ =>
            val scrubbedStitches = columns.map { column =>
              val scrubbedKey = TweetKey.scrubbedFieldKey(tweetId, column.id)
              val record =
                TweetManhattanRecord(
                  scrubbedKey,
                  ManhattanValue(StringCodec.toByteBuffer(""), Some(mhTimestamp))
                )
              insert(record).liftToTry
            }
            Stitch.collect(scrubbedStitches)
          }
          .map(collectWithRateLimitCheck)
      }
      Stitch.collect(stitches).map(collectWithRateLimitCheck).lowerFromTry.onSuccess { _ =>
        tweetIds.foreach { id => scribe.logScrubbed(id, columns.map(_.id.toInt), mhTimestamp) }
      }
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala
@ -0,0 +1,20 @@
 package com.twitter.tweetypie.storage
 import com.twitter.util.Time
 object SoftDeleteHandler {
  def apply(
    insert: ManhattanOperations.Insert,
    scribe: Scribe
  ): TweetStorageClient.SoftDelete =
    tweetId => {
      val mhTimestamp = Time.now
      val softDeleteRecord = TweetStateRecord
        .SoftDeleted(tweetId, mhTimestamp.inMillis)
        .toTweetMhRecord
      insert(softDeleteRecord).onSuccess { _ =>
        scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true)
      }
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala
@ -0,0 +1,33 @@
 package com.twitter.tweetypie.storage
 import com.twitter.finagle.stats.StatsReceiver
 object Stats {
  // These two methods below (addWidthStat and updatePerFieldQpsCounters) are called per RPC call for most APIs,
  // so we rely on the stats receiver that is passed in to the library to do memoization.
  private[storage] def addWidthStat(
    rpcName: String,
    paramName: String,
    width: Int,
    stats: StatsReceiver
  ): Unit =
    getStat(rpcName, paramName, stats).add(width)
  // Updates the counters for each Additional field. The idea here is to expose the QPS for each
  // additional field
  private[storage] def updatePerFieldQpsCounters(
    rpcName: String,
    fieldIds: Seq[FieldId],
    count: Int,
    stats: StatsReceiver
  ): Unit = {
    fieldIds.foreach { fieldId => getCounter(rpcName, fieldId, stats).incr(count) }
  }
  private def getCounter(rpcName: String, fieldId: FieldId, stats: StatsReceiver) =
    stats.scope(rpcName, "fields", fieldId.toString).counter("count")
  private def getStat(rpcName: String, paramName: String, stats: StatsReceiver) =
    stats.scope(rpcName, paramName).stat("width")
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala
@ -0,0 +1,129 @@
 package com.twitter.tweetypie.storage
 import com.twitter.tweetypie.storage_internal.thriftscala._
 import com.twitter.tbird.{thriftscala => tbird}
 object StatusConversions {
  /**
   * This is used only in Scribe.scala, when scribing to tbird_add_status
   * Once we remove that, we can also remove this.
   */
  def toTBirdStatus(tweet: StoredTweet): tbird.Status =
    tbird.Status(
      id = tweet.id,
      userId = tweet.userId.get,
      text = tweet.text.get,
      createdVia = tweet.createdVia.get,
      createdAtSec = tweet.createdAtSec.get,
      reply = tweet.reply.map(toTBirdReply),
      share = tweet.share.map(toTBirdShare),
      contributorId = tweet.contributorId,
      geo = tweet.geo.map(toTBirdGeo),
      hasTakedown = tweet.hasTakedown.getOrElse(false),
      nsfwUser = tweet.nsfwUser.getOrElse(false),
      nsfwAdmin = tweet.nsfwAdmin.getOrElse(false),
      media = tweet.media.map(_.map(toTBirdMedia)).getOrElse(Seq()),
      narrowcast = tweet.narrowcast.map(toTBirdNarrowcast),
      nullcast = tweet.nullcast.getOrElse(false),
      trackingId = tweet.trackingId
    )
  /**
   * This is only used in a test, to verify that the above method `toTBirdStatus`
   * works, so we can't remove it as long as the above method exists.
   */
  def fromTBirdStatus(status: tbird.Status): StoredTweet = {
    StoredTweet(
      id = status.id,
      userId = Some(status.userId),
      text = Some(status.text),
      createdVia = Some(status.createdVia),
      createdAtSec = Some(status.createdAtSec),
      reply = status.reply.map(fromTBirdReply),
      share = status.share.map(fromTBirdShare),
      contributorId = status.contributorId,
      geo = status.geo.map(fromTBirdGeo),
      hasTakedown = Some(status.hasTakedown),
      nsfwUser = Some(status.nsfwUser),
      nsfwAdmin = Some(status.nsfwAdmin),
      media = Some(status.media.map(fromTBirdMedia)),
      narrowcast = status.narrowcast.map(fromTBirdNarrowcast),
      nullcast = Some(status.nullcast),
      trackingId = status.trackingId
    )
  }
  private def fromTBirdReply(reply: tbird.Reply): StoredReply =
    StoredReply(
      inReplyToStatusId = reply.inReplyToStatusId,
      inReplyToUserId = reply.inReplyToUserId
    )
  private def fromTBirdShare(share: tbird.Share): StoredShare =
    StoredShare(
      sourceStatusId = share.sourceStatusId,
      sourceUserId = share.sourceUserId,
      parentStatusId = share.parentStatusId
    )
  private def fromTBirdGeo(geo: tbird.Geo): StoredGeo =
    StoredGeo(
      latitude = geo.latitude,
      longitude = geo.longitude,
      geoPrecision = geo.geoPrecision,
      entityId = geo.entityId
    )
  private def fromTBirdMedia(media: tbird.MediaEntity): StoredMediaEntity =
    StoredMediaEntity(
      id = media.id,
      mediaType = media.mediaType,
      width = media.width,
      height = media.height
    )
  private def fromTBirdNarrowcast(narrowcast: tbird.Narrowcast): StoredNarrowcast =
    StoredNarrowcast(
      language = Some(narrowcast.language),
      location = Some(narrowcast.location),
      ids = Some(narrowcast.ids)
    )
  private def toTBirdReply(reply: StoredReply): tbird.Reply =
    tbird.Reply(
      inReplyToStatusId = reply.inReplyToStatusId,
      inReplyToUserId = reply.inReplyToUserId
    )
  private def toTBirdShare(share: StoredShare): tbird.Share =
    tbird.Share(
      sourceStatusId = share.sourceStatusId,
      sourceUserId = share.sourceUserId,
      parentStatusId = share.parentStatusId
    )
  private def toTBirdGeo(geo: StoredGeo): tbird.Geo =
    tbird.Geo(
      latitude = geo.latitude,
      longitude = geo.longitude,
      geoPrecision = geo.geoPrecision,
      entityId = geo.entityId,
      name = geo.name
    )
  private def toTBirdMedia(media: StoredMediaEntity): tbird.MediaEntity =
    tbird.MediaEntity(
      id = media.id,
      mediaType = media.mediaType,
      width = media.width,
      height = media.height
    )
  private def toTBirdNarrowcast(narrowcast: StoredNarrowcast): tbird.Narrowcast =
    tbird.Narrowcast(
      language = narrowcast.language.getOrElse(Nil),
      location = narrowcast.location.getOrElse(Nil),
      ids = narrowcast.ids.getOrElse(Nil)
    )
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala
@ -0,0 +1,346 @@
 package com.twitter.tweetypie.storage
 import com.twitter.mediaservices.commons.tweetmedia.thriftscala._
 import com.twitter.scrooge.TFieldBlob
 import com.twitter.tweetypie.additionalfields.AdditionalFields
 import com.twitter.tweetypie.storage_internal.thriftscala._
 import com.twitter.tweetypie.thriftscala._
 import com.twitter.tweetypie.util.TweetLenses
 object StorageConversions {
  private val tbTweetCompiledAdditionalFieldIds =
    StoredTweet.metaData.fields.map(_.id).filter(AdditionalFields.isAdditionalFieldId)
  def toStoredReply(reply: Reply, conversationId: Option[TweetId]): StoredReply =
    StoredReply(
      inReplyToStatusId = reply.inReplyToStatusId.getOrElse(0),
      inReplyToUserId = reply.inReplyToUserId,
      conversationId = conversationId
    )
  def toStoredShare(share: Share): StoredShare =
    StoredShare(
      share.sourceStatusId,
      share.sourceUserId,
      share.parentStatusId
    )
  def toStoredQuotedTweet(qt: QuotedTweet, text: String): Option[StoredQuotedTweet] =
    qt.permalink
      .filterNot { p =>
        text.contains(p.shortUrl)
      } // omit StoredQuotedTweet when url already in text
      .map { p =>
        StoredQuotedTweet(
          qt.tweetId,
          qt.userId,
          p.shortUrl
        )
      }
  def toStoredGeo(tweet: Tweet): Option[StoredGeo] =
    TweetLenses.geoCoordinates.get(tweet) match {
      case None =>
        TweetLenses.placeId.get(tweet) match {
          case None => None
          case Some(placeId) =>
            Some(
              StoredGeo(
                latitude = 0.0,
                longitude = 0.0,
                geoPrecision = 0,
                entityId = 0,
                name = Some(placeId)
              )
            )
        }
      case Some(coords) =>
        Some(
          StoredGeo(
            latitude = coords.latitude,
            longitude = coords.longitude,
            geoPrecision = coords.geoPrecision,
            entityId = if (coords.display) 2 else 0,
            name = TweetLenses.placeId.get(tweet)
          )
        )
    }
  def toStoredMedia(mediaList: Seq[MediaEntity]): Seq[StoredMediaEntity] =
    mediaList.filter(_.sourceStatusId.isEmpty).flatMap(toStoredMediaEntity)
  def toStoredMediaEntity(media: MediaEntity): Option[StoredMediaEntity] =
    media.sizes.find(_.sizeType == MediaSizeType.Orig).map { origSize =>
      StoredMediaEntity(
        id = media.mediaId,
        mediaType = origSize.deprecatedContentType.value.toByte,
        width = origSize.width.toShort,
        height = origSize.height.toShort
      )
    }
  // The language and ids fields are for compatibility with existing tweets stored in manhattan.
  def toStoredNarrowcast(narrowcast: Narrowcast): StoredNarrowcast =
    StoredNarrowcast(
      language = Some(Seq.empty),
      location = Some(narrowcast.location),
      ids = Some(Seq.empty)
    )
  def toStoredAdditionalFields(from: Seq[TFieldBlob], to: StoredTweet): StoredTweet =
    from.foldLeft(to) { case (t, f) => t.setField(f) }
  def toStoredAdditionalFields(from: Tweet, to: StoredTweet): StoredTweet =
    toStoredAdditionalFields(AdditionalFields.additionalFields(from), to)
  def toStoredTweet(tweet: Tweet): StoredTweet = {
    val storedTweet =
      StoredTweet(
        id = tweet.id,
        userId = Some(TweetLenses.userId(tweet)),
        text = Some(TweetLenses.text(tweet)),
        createdVia = Some(TweetLenses.createdVia(tweet)),
        createdAtSec = Some(TweetLenses.createdAt(tweet)),
        reply =
          TweetLenses.reply(tweet).map { r => toStoredReply(r, TweetLenses.conversationId(tweet)) },
        share = TweetLenses.share(tweet).map(toStoredShare),
        contributorId = tweet.contributor.map(_.userId),
        geo = toStoredGeo(tweet),
        hasTakedown = Some(TweetLenses.hasTakedown(tweet)),
        nsfwUser = Some(TweetLenses.nsfwUser(tweet)),
        nsfwAdmin = Some(TweetLenses.nsfwAdmin(tweet)),
        media = tweet.media.map(toStoredMedia),
        narrowcast = TweetLenses.narrowcast(tweet).map(toStoredNarrowcast),
        nullcast = Some(TweetLenses.nullcast(tweet)),
        trackingId = TweetLenses.trackingId(tweet),
        quotedTweet = TweetLenses.quotedTweet(tweet).flatMap { qt =>
          toStoredQuotedTweet(qt, TweetLenses.text(tweet))
        }
      )
    toStoredAdditionalFields(tweet, storedTweet)
  }
  /**
   * Does not need core data to be set. Constructs on disk tweet by avoiding the TweetLenses object
   * and only extracting the specified fields.
   *
   * NOTE: Assumes that specified fields are set in the tweet.
   *
   * @param tpTweet Tweetypie Tweet to be converted
   * @param fields the fields to be populated in the on disk Tweet
   *
   * @return an on disk Tweet which has only the specified fields set
   */
  def toStoredTweetForFields(tpTweet: Tweet, fields: Set[Field]): StoredTweet = {
    // Make sure all the passed in fields are known or additional fields
    require(
      (fields -- Field.AllUpdatableCompiledFields)
        .forall(field => AdditionalFields.isAdditionalFieldId(field.id))
    )
    val storedTweet =
      StoredTweet(
        id = tpTweet.id,
        geo = if (fields.contains(Field.Geo)) {
          tpTweet.coreData.get.coordinates match {
            case None =>
              tpTweet.coreData.get.placeId match {
                case None => None
                case Some(placeId) =>
                  Some(
                    StoredGeo(
                      latitude = 0.0,
                      longitude = 0.0,
                      geoPrecision = 0,
                      entityId = 0,
                      name = Some(placeId)
                    )
                  )
              }
            case Some(coords) =>
              Some(
                StoredGeo(
                  latitude = coords.latitude,
                  longitude = coords.longitude,
                  geoPrecision = coords.geoPrecision,
                  entityId = if (coords.display) 2 else 0,
                  name = tpTweet.coreData.get.placeId
                )
              )
          }
        } else {
          None
        },
        hasTakedown =
          if (fields.contains(Field.HasTakedown))
            Some(tpTweet.coreData.get.hasTakedown)
          else
            None,
        nsfwUser =
          if (fields.contains(Field.NsfwUser))
            Some(tpTweet.coreData.get.nsfwUser)
          else
            None,
        nsfwAdmin =
          if (fields.contains(Field.NsfwAdmin))
            Some(tpTweet.coreData.get.nsfwAdmin)
          else
            None
      )
    if (fields.map(_.id).exists(AdditionalFields.isAdditionalFieldId))
      toStoredAdditionalFields(tpTweet, storedTweet)
    else
      storedTweet
  }
  def fromStoredReply(reply: StoredReply): Reply =
    Reply(
      Some(reply.inReplyToStatusId).filter(_ > 0),
      reply.inReplyToUserId
    )
  def fromStoredShare(share: StoredShare): Share =
    Share(
      share.sourceStatusId,
      share.sourceUserId,
      share.parentStatusId
    )
  def fromStoredQuotedTweet(qt: StoredQuotedTweet): QuotedTweet =
    QuotedTweet(
      qt.tweetId,
      qt.userId,
      Some(
        ShortenedUrl(
          shortUrl = qt.shortUrl,
          longUrl = "", // will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator
          displayText = "" //will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator
        )
      )
    )
  def fromStoredGeo(geo: StoredGeo): GeoCoordinates =
    GeoCoordinates(
      latitude = geo.latitude,
      longitude = geo.longitude,
      geoPrecision = geo.geoPrecision,
      display = geo.entityId == 2
    )
  def fromStoredMediaEntity(media: StoredMediaEntity): MediaEntity =
    MediaEntity(
      fromIndex = -1, // will get filled in later
      toIndex = -1, // will get filled in later
      url = null, // will get filled in later
      mediaPath = "", // field is obsolete
      mediaUrl = null, // will get filled in later
      mediaUrlHttps = null, // will get filled in later
      displayUrl = null, // will get filled in later
      expandedUrl = null, // will get filled in later
      mediaId = media.id,
      nsfw = false,
      sizes = Set(
        MediaSize(
          sizeType = MediaSizeType.Orig,
          resizeMethod = MediaResizeMethod.Fit,
          deprecatedContentType = MediaContentType(media.mediaType),
          width = media.width,
          height = media.height
        )
      )
    )
  def fromStoredNarrowcast(narrowcast: StoredNarrowcast): Narrowcast =
    Narrowcast(
      location = narrowcast.location.getOrElse(Seq())
    )
  def fromStoredTweet(storedTweet: StoredTweet): Tweet = {
    val coreData =
      TweetCoreData(
        userId = storedTweet.userId.get,
        text = storedTweet.text.get,
        createdVia = storedTweet.createdVia.get,
        createdAtSecs = storedTweet.createdAtSec.get,
        reply = storedTweet.reply.map(fromStoredReply),
        share = storedTweet.share.map(fromStoredShare),
        hasTakedown = storedTweet.hasTakedown.getOrElse(false),
        nsfwUser = storedTweet.nsfwUser.getOrElse(false),
        nsfwAdmin = storedTweet.nsfwAdmin.getOrElse(false),
        narrowcast = storedTweet.narrowcast.map(fromStoredNarrowcast),
        nullcast = storedTweet.nullcast.getOrElse(false),
        trackingId = storedTweet.trackingId,
        conversationId = storedTweet.reply.flatMap(_.conversationId),
        placeId = storedTweet.geo.flatMap(_.name),
        coordinates = storedTweet.geo.map(fromStoredGeo),
        hasMedia = if (storedTweet.media.exists(_.nonEmpty)) Some(true) else None
      )
    // retweets should never have their media, but some tweets incorrectly do.
    val storedMedia = if (coreData.share.isDefined) Nil else storedTweet.media.toSeq
    val tpTweet =
      Tweet(
        id = storedTweet.id,
        coreData = Some(coreData),
        contributor = storedTweet.contributorId.map(Contributor(_)),
        media = Some(storedMedia.flatten.map(fromStoredMediaEntity)),
        mentions = Some(Seq.empty),
        urls = Some(Seq.empty),
        cashtags = Some(Seq.empty),
        hashtags = Some(Seq.empty),
        quotedTweet = storedTweet.quotedTweet.map(fromStoredQuotedTweet)
      )
    fromStoredAdditionalFields(storedTweet, tpTweet)
  }
  def fromStoredTweetAllowInvalid(storedTweet: StoredTweet): Tweet = {
    fromStoredTweet(
      storedTweet.copy(
        userId = storedTweet.userId.orElse(Some(-1L)),
        text = storedTweet.text.orElse(Some("")),
        createdVia = storedTweet.createdVia.orElse(Some("")),
        createdAtSec = storedTweet.createdAtSec.orElse(Some(-1L))
      ))
  }
  def fromStoredAdditionalFields(from: StoredTweet, to: Tweet): Tweet = {
    val passThroughAdditionalFields =
      from._passthroughFields.filterKeys(AdditionalFields.isAdditionalFieldId)
    val allAdditionalFields =
      from.getFieldBlobs(tbTweetCompiledAdditionalFieldIds) ++ passThroughAdditionalFields
    allAdditionalFields.values.foldLeft(to) { case (t, f) => t.setField(f) }
  }
  def toDeletedTweet(storedTweet: StoredTweet): DeletedTweet = {
    val noteTweetBlob = storedTweet.getFieldBlob(Tweet.NoteTweetField.id)
    val noteTweetOption = noteTweetBlob.map(blob => NoteTweet.decode(blob.read))
    DeletedTweet(
      id = storedTweet.id,
      userId = storedTweet.userId,
      text = storedTweet.text,
      createdAtSecs = storedTweet.createdAtSec,
      share = storedTweet.share.map(toDeletedShare),
      media = storedTweet.media.map(_.map(toDeletedMediaEntity)),
      noteTweetId = noteTweetOption.map(_.id),
      isExpandable = noteTweetOption.flatMap(_.isExpandable)
    )
  }
  def toDeletedShare(storedShare: StoredShare): DeletedTweetShare =
    DeletedTweetShare(
      sourceStatusId = storedShare.sourceStatusId,
      sourceUserId = storedShare.sourceUserId,
      parentStatusId = storedShare.parentStatusId
    )
  def toDeletedMediaEntity(storedMediaEntity: StoredMediaEntity): DeletedTweetMediaEntity =
    DeletedTweetMediaEntity(
      id = storedMediaEntity.id,
      mediaType = storedMediaEntity.mediaType,
      width = storedMediaEntity.width,
      height = storedMediaEntity.height
    )
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala
@ -0,0 +1,92 @@
 package com.twitter.tweetypie.storage
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import com.twitter.util.Time
 import com.twitter.util.Try
 import java.util.Arrays
 import scala.util.control.NoStackTrace
 import scala.util.control.NonFatal
 sealed abstract class TimestampType(val keyName: String)
 object TimestampType {
  object Default extends TimestampType("timestamp")
  object SoftDelete extends TimestampType("softdelete_timestamp")
 }
 /**
 * TimestampDecoder gets the timestamps associated with state records. The Manhattan timestamp is
 * used for legacy records (with value "1"), otherwise the timestamp is extracted from the
 * JSON value.
 *
 * See "Metadata" in README.md for further information about state records.
 */
 object TimestampDecoder {
  case class UnparsableJson(msg: String, t: Throwable) extends Exception(msg, t) with NoStackTrace
  case class MissingJsonTimestamp(msg: String) extends Exception(msg) with NoStackTrace
  case class UnexpectedJsonValue(msg: String) extends Exception(msg) with NoStackTrace
  case class MissingManhattanTimestamp(msg: String) extends Exception(msg) with NoStackTrace
  private[storage] val LegacyValue: Array[Byte] = Array('1')
  /**
   * The first backfill of tweet data to Manhattan supplied timestamps in milliseconds where
   * nanoseconds were expected. The result is that some values have an incorrect Manhattan
   * timestamp. For these bad timestamps, time.inNanoseconds is actually milliseconds.
   *
   * For example, the deletion record for tweet 22225781 has Manhattan timestamp 1970-01-01 00:23:24 +0000.
   * Contrast with the deletion record for tweet 435404491999813632 with Manhattan timestamp 2014-11-09 14:24:04 +0000
   *
   * This threshold value comes from the last time in milliseconds that was interpreted
   * as nanoseconds, e.g. Time.fromNanoseconds(1438387200000L) == 1970-01-01 00:23:58 +0000
   */
  private[storage] val BadTimestampThreshold = Time.at("1970-01-01 00:23:58 +0000")
  def decode(record: TweetManhattanRecord, tsType: TimestampType): Try[Long] =
    decode(record.value, tsType)
  def decode(mhValue: TweetManhattanValue, tsType: TimestampType): Try[Long] = {
    val value = ByteArrayCodec.fromByteBuffer(mhValue.contents)
    if (isLegacyRecord(value)) {
      nativeManhattanTimestamp(mhValue)
    } else {
      jsonTimestamp(value, tsType)
    }
  }
  private def isLegacyRecord(value: Array[Byte]) = Arrays.equals(value, LegacyValue)
  private def nativeManhattanTimestamp(mhValue: TweetManhattanValue): Try[Long] =
    mhValue.timestamp match {
      case Some(ts) => Return(correctedTimestamp(ts))
      case None =>
        Throw(MissingManhattanTimestamp(s"Manhattan timestamp missing in value $mhValue"))
    }
  private def jsonTimestamp(value: Array[Byte], tsType: TimestampType): Try[Long] =
    Try { Json.decode(value) }
      .rescue { case NonFatal(e) => Throw(UnparsableJson(e.getMessage, e)) }
      .flatMap { m =>
        m.get(tsType.keyName) match {
          case Some(v) =>
            v match {
              case l: Long => Return(l)
              case i: Integer => Return(i.toLong)
              case _ =>
                Throw(
                  UnexpectedJsonValue(s"Unexpected value for ${tsType.keyName} in record data $m")
                )
            }
          case None =>
            Throw(MissingJsonTimestamp(s"Missing key ${tsType.keyName} in record data $m"))
        }
      }
  def correctedTime(t: Time): Time =
    if (t < BadTimestampThreshold) Time.fromMilliseconds(t.inNanoseconds) else t
  def correctedTime(t: Long): Time = correctedTime(Time.fromNanoseconds(t))
  def correctedTimestamp(t: Time): Long =
    if (t < BadTimestampThreshold) t.inNanoseconds else t.inMilliseconds
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala
@ -0,0 +1,164 @@
 package com.twitter.tweetypie.storage
 /**
 * Responsible for encoding/decoding Tweet records to/from Manhattan keys
 *
 * K/V Scheme:
 * -----------
 *      [TweetId]
 *           /metadata
 *               /delete_state (a.k.a. hard delete)
 *               /soft_delete_state
 *               /bounce_delete_state
 *               /undelete_state
 *               /force_added_state
 *               /scrubbed_fields/
 *                    /[ScrubbedFieldId_1]
 *                     ..
 *                   /[ScrubbedFieldId_M]
 *          /fields
 *             /internal
 *                 /1
 *                 /9
 *                 ..
 *                 /99
 *             /external
 *                 /100
 *                 ..
 *
 * IMPORTANT NOTE:
 * 1) Field Ids 2 to 8 in Tweet thrift struct are considered "core fields" are 'packed' together
 *    into a TFieldBlob and stored under field id 1 (i.e [DatasetName]/[TweetId]/fields/internal/1).
 *    This is why we do not see keys from [DatasetName]/[TweetId]/fields/internal/2 to [DatasetName]/
 *    [TweetId]/fields/internal/8)
 *
 * 2) Also, the tweet id (which is the field id 1 in Tweet thrift structure) is not explicitly stored
 *    in Manhattan. There is no need to explicitly store it since it is a part of the Pkey
 */
 case class TweetKey(tweetId: TweetId, lKey: TweetKey.LKey) {
  override def toString: String =
    s"/${ManhattanOperations.PkeyInjection(tweetId)}/${ManhattanOperations.LkeyInjection(lKey)}"
 }
 object TweetKey {
  // Manhattan uses lexicographical order for keys. To make sure lexicographical order matches the
  // numerical order, we should pad both tweet id and field ids with leading zeros.
  // Since tweet id is long and field id is a short, the max width of each can be obtained by doing
  // Long.MaxValue.toString.length and Short.MaxValue.toString.length respectively
  private val TweetIdFormatStr = s"%0${Long.MaxValue.toString.length}d"
  private val FieldIdFormatStr = s"%0${Short.MaxValue.toString.length}d"
  private[storage] def padTweetIdStr(tweetId: Long): String = TweetIdFormatStr.format(tweetId)
  private[storage] def padFieldIdStr(fieldId: Short): String = FieldIdFormatStr.format(fieldId)
  def coreFieldsKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.CoreFieldsKey)
  def hardDeletionStateKey(tweetId: TweetId): TweetKey =
    TweetKey(tweetId, LKey.HardDeletionStateKey)
  def softDeletionStateKey(tweetId: TweetId): TweetKey =
    TweetKey(tweetId, LKey.SoftDeletionStateKey)
  def bounceDeletionStateKey(tweetId: TweetId): TweetKey =
    TweetKey(tweetId, LKey.BounceDeletionStateKey)
  def unDeletionStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.UnDeletionStateKey)
  def forceAddedStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ForceAddedStateKey)
  def scrubbedGeoFieldKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ScrubbedGeoFieldKey)
  def fieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey =
    TweetKey(tweetId, LKey.FieldKey(fieldId))
  def internalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey =
    TweetKey(tweetId, LKey.InternalFieldsKey(fieldId))
  def additionalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey =
    TweetKey(tweetId, LKey.AdditionalFieldsKey(fieldId))
  def scrubbedFieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey =
    TweetKey(tweetId, LKey.ScrubbedFieldKey(fieldId))
  // AllFieldsKeyPrefix:       fields
  // CoreFieldsKey:            fields/internal/1  (Stores subset of StoredTweet fields which are
  //                             "packed" into a single CoreFields record)
  // HardDeletionStateKey:     metadata/delete_state
  // SoftDeletionStateKey:     metadata/soft_delete_state
  // BounceDeletionStateKey:   metadata/bounce_delete_state
  // UnDeletionStateKey:       metadata/undelete_state
  // ForceAddedStateKey:       metadata/force_added_state
  // FieldKey:                 fields/<group_name>/<padded_field_id> (where <group_name>
  //                             is 'internal' for field ids < 100 and 'external' for all other
  //                             fields ids)
  // InternalFieldsKeyPrefix:  fields/internal
  // PKey:                     <empty string>
  // ScrubbedFieldKey:         metadata/scrubbed_fields/<padded_field_id>
  // ScrubbedFieldKeyPrefix:   metadata/scrubbed_fields
  sealed abstract class LKey(override val toString: String)
  object LKey {
    private val HardDeletionRecordLiteral = "delete_state"
    private val SoftDeletionRecordLiteral = "soft_delete_state"
    private val BounceDeletionRecordLiteral = "bounce_delete_state"
    private val UnDeletionRecordLiteral = "undelete_state"
    private val ForceAddRecordLiteral = "force_added_state"
    private val ScrubbedFieldsGroup = "scrubbed_fields"
    private val InternalFieldsGroup = "internal"
    private val ExternalFieldsGroup = "external"
    private val MetadataCategory = "metadata"
    private val FieldsCategory = "fields"
    private val InternalFieldsKeyPrefix = s"$FieldsCategory/$InternalFieldsGroup/"
    private val ExternalFieldsKeyPrefix = s"$FieldsCategory/$ExternalFieldsGroup/"
    private val ScrubbedFieldsKeyPrefix = s"$MetadataCategory/$ScrubbedFieldsGroup/"
    sealed abstract class MetadataKey(metadataType: String)
        extends LKey(s"$MetadataCategory/$metadataType")
    sealed abstract class StateKey(stateType: String) extends MetadataKey(stateType)
    case object HardDeletionStateKey extends StateKey(s"$HardDeletionRecordLiteral")
    case object SoftDeletionStateKey extends StateKey(s"$SoftDeletionRecordLiteral")
    case object BounceDeletionStateKey extends StateKey(s"$BounceDeletionRecordLiteral")
    case object UnDeletionStateKey extends StateKey(s"$UnDeletionRecordLiteral")
    case object ForceAddedStateKey extends StateKey(s"$ForceAddRecordLiteral")
    case class ScrubbedFieldKey(fieldId: FieldId)
        extends MetadataKey(s"$ScrubbedFieldsGroup/${padFieldIdStr(fieldId)}")
    val ScrubbedGeoFieldKey: LKey.ScrubbedFieldKey = ScrubbedFieldKey(TweetFields.geoFieldId)
    /**
     * LKey that has one of many possible fields id. This generalize over
     * internal and additional fields key.
     */
    sealed abstract class FieldKey(prefix: String) extends LKey(toString) {
      def fieldId: FieldId
      override val toString: String = prefix + padFieldIdStr(fieldId)
    }
    object FieldKey {
      def apply(fieldId: FieldId): FieldKey =
        fieldId match {
          case id if id < TweetFields.firstAdditionalFieldId => InternalFieldsKey(fieldId)
          case _ => AdditionalFieldsKey(fieldId)
        }
    }
    case class InternalFieldsKey(fieldId: FieldId) extends FieldKey(InternalFieldsKeyPrefix) {
      assert(fieldId < TweetFields.firstAdditionalFieldId)
    }
    case class AdditionalFieldsKey(fieldId: FieldId) extends FieldKey(ExternalFieldsKeyPrefix) {
      assert(fieldId >= TweetFields.firstAdditionalFieldId)
    }
    val CoreFieldsKey: LKey.InternalFieldsKey = InternalFieldsKey(TweetFields.rootCoreFieldId)
    case class Unknown private (str: String) extends LKey(str)
    def fromString(str: String): LKey = {
      def extractFieldId(prefix: String): FieldId =
        str.slice(prefix.length, str.length).toShort
      str match {
        case CoreFieldsKey.toString => CoreFieldsKey
        case HardDeletionStateKey.toString => HardDeletionStateKey
        case SoftDeletionStateKey.toString => SoftDeletionStateKey
        case BounceDeletionStateKey.toString => BounceDeletionStateKey
        case UnDeletionStateKey.toString => UnDeletionStateKey
        case ForceAddedStateKey.toString => ForceAddedStateKey
        case ScrubbedGeoFieldKey.toString => ScrubbedGeoFieldKey
        case _ if str.startsWith(InternalFieldsKeyPrefix) =>
          InternalFieldsKey(extractFieldId(InternalFieldsKeyPrefix))
        case _ if str.startsWith(ExternalFieldsKeyPrefix) =>
          AdditionalFieldsKey(extractFieldId(ExternalFieldsKeyPrefix))
        case _ if str.startsWith(ScrubbedFieldsKeyPrefix) =>
          ScrubbedFieldKey(extractFieldId(ScrubbedFieldsKeyPrefix))
        case _ => Unknown(str)
      }
    }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala
@ -0,0 +1,90 @@
 package com.twitter.tweetypie.storage
 import com.twitter.storage.client.manhattan.kv.ManhattanValue
 import com.twitter.util.Time
 /**
 * A [[TweetStateRecord]] represents an action taken on a tweet and can be used to determine a tweet's state.
 *
 * The state is determined by the record with the most recent timestamp. In the absence of any
 * record a tweet is considered found, which is to say the tweet has not been through the
 * deletion process.
 *
 * The [[TweetStateRecord]] type is determined by the lkey of a tweet manhattan record:
 *    metadata/delete_state      -> HardDeleted
 *    metadata/soft_delete_state -> SoftDeleted
 *    metadata/undelete_state    -> Undeleted
 *    metadata/force_added_state -> ForceAdded
 *
 * See the README in this directory for more details about the state of a tweet.
 */
 sealed trait TweetStateRecord {
  def tweetId: TweetId
  def createdAt: Long
  def stateKey: TweetKey.LKey.StateKey
  def values: Map[String, Long] = Map("timestamp" -> createdAt)
  def name: String
  def toTweetMhRecord: TweetManhattanRecord = {
    val valByteBuffer = ByteArrayCodec.toByteBuffer(Json.encode(values))
    val value = ManhattanValue(valByteBuffer, Some(Time.fromMilliseconds(createdAt)))
    TweetManhattanRecord(TweetKey(tweetId, stateKey), value)
  }
 }
 object TweetStateRecord {
  /** When a soft-deleted or bounce deleted tweet is ultimately hard-deleted by an offline job. */
  case class HardDeleted(tweetId: TweetId, createdAt: Long, deletedAt: Long)
      extends TweetStateRecord {
    // timestamp in the mh backend is the hard deletion timestamp
    override def values = Map("timestamp" -> createdAt, "softdelete_timestamp" -> deletedAt)
    def stateKey = TweetKey.LKey.HardDeletionStateKey
    def name = "hard_deleted"
  }
  /** When a tweet is deleted by the user. It can still be undeleted while in the soft deleted state. */
  case class SoftDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord {
    def stateKey = TweetKey.LKey.SoftDeletionStateKey
    def name = "soft_deleted"
  }
  /** When a tweet is deleted by go/bouncer for violating Twitter Rules. It MAY NOT be undeleted. */
  case class BounceDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord {
    def stateKey = TweetKey.LKey.BounceDeletionStateKey
    def name = "bounce_deleted"
  }
  /** When a tweet is undeleted by an internal system. */
  case class Undeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord {
    def stateKey = TweetKey.LKey.UnDeletionStateKey
    def name = "undeleted"
  }
  /** When a tweet is created using the forceAdd endpoint. */
  case class ForceAdded(tweetId: TweetId, createdAt: Long) extends TweetStateRecord {
    def stateKey = TweetKey.LKey.ForceAddedStateKey
    def name = "force_added"
  }
  def fromTweetMhRecord(record: TweetManhattanRecord): Option[TweetStateRecord] = {
    def ts = TimestampDecoder.decode(record, TimestampType.Default).getOrElse(0L)
    def sdts = TimestampDecoder.decode(record, TimestampType.SoftDelete).getOrElse(0L)
    def tweetId = record.pkey
    record.lkey match {
      case TweetKey.LKey.HardDeletionStateKey => Some(HardDeleted(tweetId, ts, sdts))
      case TweetKey.LKey.SoftDeletionStateKey => Some(SoftDeleted(tweetId, ts))
      case TweetKey.LKey.BounceDeletionStateKey => Some(BounceDeleted(tweetId, ts))
      case TweetKey.LKey.UnDeletionStateKey => Some(Undeleted(tweetId, ts))
      case TweetKey.LKey.ForceAddedStateKey => Some(ForceAdded(tweetId, ts))
      case _ => None
    }
  }
  def fromTweetMhRecords(records: Seq[TweetManhattanRecord]): Seq[TweetStateRecord] =
    records.flatMap(fromTweetMhRecord)
  def mostRecent(records: Seq[TweetManhattanRecord]): Option[TweetStateRecord] =
    fromTweetMhRecords(records).sortBy(_.createdAt).lastOption
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala
@ -0,0 +1,201 @@
 package com.twitter.tweetypie.storage
 import com.twitter.stitch.Stitch
 import com.twitter.tweetypie.storage.Response.TweetResponse
 import com.twitter.tweetypie.thriftscala.Tweet
 import com.twitter.util.Future
 /**
 * Interface for reading and writing tweet data in Manhattan
 */
 trait TweetStorageClient {
  import TweetStorageClient._
  def addTweet: AddTweet
  def deleteAdditionalFields: DeleteAdditionalFields
  def getTweet: GetTweet
  def getStoredTweet: GetStoredTweet
  def getDeletedTweets: GetDeletedTweets
  def undelete: Undelete
  def updateTweet: UpdateTweet
  def scrub: Scrub
  def softDelete: SoftDelete
  def bounceDelete: BounceDelete
  def hardDeleteTweet: HardDeleteTweet
  def ping: Ping
 }
 object TweetStorageClient {
  type GetTweet = TweetId => Stitch[GetTweet.Response]
  object GetTweet {
    sealed trait Response
    object Response {
      case class Found(tweet: Tweet) extends Response
      object NotFound extends Response
      object Deleted extends Response
      // On BounceDeleted, provide the full Tweet so that implementations
      // (i.e. ManhattanTweetStorageClient) don't not need to be aware of the specific tweet
      // fields required by callers for proper processing of bounced deleted tweets.
      case class BounceDeleted(tweet: Tweet) extends Response
    }
  }
  type GetStoredTweet = TweetId => Stitch[GetStoredTweet.Response]
  object GetStoredTweet {
    sealed abstract class Error(val message: String) {
      override def toString: String = message
    }
    object Error {
      case object TweetIsCorrupt extends Error("stored tweet data is corrupt and cannot be decoded")
      case object ScrubbedFieldsPresent
          extends Error("stored tweet fields that should be scrubbed are still present")
      case object TweetFieldsMissingOrInvalid
          extends Error("expected tweet fields are missing or contain invalid values")
      case object TweetShouldBeHardDeleted
          extends Error("stored tweet that should be hard deleted is still present")
    }
    sealed trait Response
    object Response {
      sealed trait StoredTweetMetadata {
        def state: Option[TweetStateRecord]
        def allStates: Seq[TweetStateRecord]
        def scrubbedFields: Set[FieldId]
      }
      sealed trait StoredTweetErrors {
        def errs: Seq[Error]
      }
      /**
       * Tweet data was found, possibly state records and/or scrubbed field records.
       */
      sealed trait FoundAny extends Response with StoredTweetMetadata {
        def tweet: Tweet
      }
      object FoundAny {
        def unapply(
          response: Response
        ): Option[
          (Tweet, Option[TweetStateRecord], Seq[TweetStateRecord], Set[FieldId], Seq[Error])
        ] =
          response match {
            case f: FoundWithErrors =>
              Some((f.tweet, f.state, f.allStates, f.scrubbedFields, f.errs))
            case f: FoundAny => Some((f.tweet, f.state, f.allStates, f.scrubbedFields, Seq.empty))
            case _ => None
          }
      }
      /**
       * No records for this tweet id were found in storage
       */
      case class NotFound(id: TweetId) extends Response
      /**
       * Data related to the Tweet id was found but could not be loaded successfully. The
       * errs array contains details of the problems.
       */
      case class Failed(
        id: TweetId,
        state: Option[TweetStateRecord],
        allStates: Seq[TweetStateRecord],
        scrubbedFields: Set[FieldId],
        errs: Seq[Error],
      ) extends Response
          with StoredTweetMetadata
          with StoredTweetErrors
      /**
       * No Tweet data was found, and the most recent state record found is HardDeleted
       */
      case class HardDeleted(
        id: TweetId,
        state: Option[TweetStateRecord.HardDeleted],
        allStates: Seq[TweetStateRecord],
        scrubbedFields: Set[FieldId],
      ) extends Response
          with StoredTweetMetadata
      /**
       * Tweet data was found, and the most recent state record found, if any, is not
       * any form of deletion record.
       */
      case class Found(
        tweet: Tweet,
        state: Option[TweetStateRecord],
        allStates: Seq[TweetStateRecord],
        scrubbedFields: Set[FieldId],
      ) extends FoundAny
      /**
       * Tweet data was found, and the most recent state record found indicates deletion.
       */
      case class FoundDeleted(
        tweet: Tweet,
        state: Option[TweetStateRecord],
        allStates: Seq[TweetStateRecord],
        scrubbedFields: Set[FieldId],
      ) extends FoundAny
      /**
       * Tweet data was found, however errors were detected in the stored data. Required
       * fields may be missing from the Tweet struct (e.g. CoreData), stored fields that
       * should be scrubbed remain present, or Tweets that should be hard-deleted remain
       * in storage. The errs array contains details of the problems.
       */
      case class FoundWithErrors(
        tweet: Tweet,
        state: Option[TweetStateRecord],
        allStates: Seq[TweetStateRecord],
        scrubbedFields: Set[FieldId],
        errs: Seq[Error],
      ) extends FoundAny
          with StoredTweetErrors
    }
  }
  type HardDeleteTweet = TweetId => Stitch[HardDeleteTweet.Response]
  type SoftDelete = TweetId => Stitch[Unit]
  type BounceDelete = TweetId => Stitch[Unit]
  object HardDeleteTweet {
    sealed trait Response
    object Response {
      case class Deleted(deletedAtMillis: Option[Long], createdAtMillis: Option[Long])
          extends Response
      case class NotDeleted(id: TweetId, ineligibleLKey: Option[TweetKey.LKey])
          extends Throwable
          with Response
    }
  }
  type Undelete = TweetId => Stitch[Undelete.Response]
  object Undelete {
    case class Response(
      code: UndeleteResponseCode,
      tweet: Option[Tweet] = None,
      createdAtMillis: Option[Long] = None,
      archivedAtMillis: Option[Long] = None)
    sealed trait UndeleteResponseCode
    object UndeleteResponseCode {
      object Success extends UndeleteResponseCode
      object BackupNotFound extends UndeleteResponseCode
      object NotCreated extends UndeleteResponseCode
    }
  }
  type AddTweet = Tweet => Stitch[Unit]
  type UpdateTweet = (Tweet, Seq[Field]) => Stitch[TweetResponse]
  type GetDeletedTweets = Seq[TweetId] => Stitch[Seq[DeletedTweetResponse]]
  type DeleteAdditionalFields = (Seq[TweetId], Seq[Field]) => Stitch[Seq[TweetResponse]]
  type Scrub = (Seq[TweetId], Seq[Field]) => Stitch[Unit]
  type Ping = () => Future[Unit]
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala
@ -0,0 +1,34 @@
 package com.twitter.tweetypie.storage
 import scala.util.control.NoStackTrace
 sealed abstract class TweetStorageException(message: String, cause: Throwable)
    extends Exception(message, cause)
 /**
 * The request was not properly formed and failed an assertion present in the code. Should not be
 * retried without modification.
 */
 case class ClientError(message: String, cause: Throwable)
    extends TweetStorageException(message, cause)
    with NoStackTrace
 /**
 * Request was rejected by Manhattan or the in-process rate limiter. Should not be retried.
 */
 case class RateLimited(message: String, cause: Throwable)
    extends TweetStorageException(message, cause)
    with NoStackTrace
 /**
 * Corrupt tweets were requested from Manhattan
 */
 case class VersionMismatchError(message: String, cause: Throwable = null)
    extends TweetStorageException(message, cause)
    with NoStackTrace
 /**
 * All other unhandled exceptions.
 */
 case class InternalError(message: String, cause: Throwable = null)
    extends TweetStorageException(message, cause)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala
@ -0,0 +1,265 @@
 package com.twitter.tweetypie.storage
 import com.twitter.logging.Logger
 import com.twitter.scrooge.TFieldBlob
 import com.twitter.snowflake.id.SnowflakeId
 import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
 import com.twitter.storage.client.manhattan.kv.ManhattanException
 import com.twitter.tweetypie.storage.Response._
 import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import com.twitter.util.Try
 object TweetUtils {
  val log: Logger = Logger("com.twitter.tweetypie.storage.TweetStorageLibrary")
  import FieldResponseCodec.ValueNotFoundException
  /**
   * It's rare, but we have seen tweets with userId=0, which is likely the result of a
   * failed/partial delete. Treat these as invalid tweets, which are returned to callers
   * as not found.
   */
  def isValid(tweet: StoredTweet): Boolean =
    tweet.userId.exists(_ != 0) && tweet.text.nonEmpty &&
      tweet.createdVia.nonEmpty && tweet.createdAtSec.nonEmpty
  /**
   * Helper function to extract Scrubbed field Ids from the result returned by reading entire tweet prefix
   * function.
   *
   * @param records The sequence of MH records for the given tweetId
   *
   * @return The set of scrubbed field ids
   */
  private[tweetypie] def extractScrubbedFields(records: Seq[TweetManhattanRecord]): Set[Short] =
    records
      .map(r => r.lkey)
      .collect { case TweetKey.LKey.ScrubbedFieldKey(fieldId) => fieldId }
      .toSet
  private[tweetypie] val expectedFields =
    TweetFields.requiredFieldIds.toSet - TweetFields.tweetIdField
  /**
   * Find the timestamp from a tweetId and a list of MH records. This is used when
   * you need a timestamp and you aren't sure that tweetId is a snowflake id.
   *
   * @param tweetId A tweetId you want the timestamp for.
   * @param records Tbird_mh records keyed on tweetId, one of which should be the
   * core fields record.
   * @return A milliseconds timestamp if one could be found.
   */
  private[tweetypie] def creationTimeFromTweetIdOrMHRecords(
    tweetId: Long,
    records: Seq[TweetManhattanRecord]
  ): Option[Long] =
    SnowflakeId
      .unixTimeMillisOptFromId(tweetId).orElse({
        records
          .find(_.lkey == TweetKey.LKey.CoreFieldsKey)
          .flatMap { coreFields =>
            CoreFieldsCodec
              .fromTFieldBlob(
                TFieldBlobCodec.fromByteBuffer(coreFields.value.contents)
              ).createdAtSec.map(seconds => seconds * 1000)
          }
      })
  /**
   * Helper function used to parse manhattan results for fields in a tweet (given in the form of
   * Sequence of (FieldKey, Try[Unit]) pairs) and build a TweetResponse object.
   *
   * @param callerName The name of the caller function. Used for error messages
   * @param tweetId Id of the Tweet for which TweetResponse is being built
   * @param fieldResults Sequence of (FieldKey, Try[Unit]).
   *
   * @return TweetResponse object
   */
  private[tweetypie] def buildTweetResponse(
    callerName: String,
    tweetId: Long,
    fieldResults: Map[FieldId, Try[Unit]]
  ): TweetResponse = {
    // Count Found/Not Found
    val successCount =
      fieldResults.foldLeft(0) {
        case (count, (_, Return(_))) => count + 1
        case (count, (_, Throw(_: ValueNotFoundException))) => count + 1
        case (count, _) => count
      }
    val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResults)
    val overallCode = if (successCount > 0 && successCount == fieldResults.size) {
      TweetResponseCode.Success
    } else {
      // If any field was rate limited, then we consider the entire tweet to be rate limited. So first we scan
      // the field results to check such an occurrence.
      val wasRateLimited = fieldResults.exists { fieldResult =>
        fieldResult._2 match {
          case Throw(e: DeniedManhattanException) => true
          case _ => false
        }
      }
      // Were we rate limited for any of the additional fields?
      if (wasRateLimited) {
        TweetResponseCode.OverCapacity
      } else if (successCount == 0) {
        // successCount is < fieldResults.size at this point. So if allOrNone is true or
        // if successCount == 0 (i.e failed on all Fields), the overall code should be 'Failure'
        TweetResponseCode.Failure
      } else {
        // allOrNone == false AND successCount > 0 at this point. Clearly the overallCode should be Partial
        TweetResponseCode.Partial
      }
    }
    TweetResponse(tweetId, overallCode, Some(fieldResponsesMap))
  }
  /**
   * Helper function to convert manhattan results into a Map[FieldId, FieldResponse]
   *
   * @param fieldResults Sequence of (TweetKey, TFieldBlob).
   */
  private[tweetypie] def getFieldResponses(
    callerName: String,
    tweetId: TweetId,
    fieldResults: Map[FieldId, Try[_]]
  ): Map[FieldId, FieldResponse] =
    fieldResults.map {
      case (fieldId, resp) =>
        def keyStr = TweetKey.fieldKey(tweetId, fieldId).toString
        resp match {
          case Return(_) =>
            fieldId -> FieldResponse(FieldResponseCode.Success, None)
          case Throw(mhException: ManhattanException) =>
            val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $mhException"
            mhException match {
              case _: ValueNotFoundException => // ValueNotFound is not an error
              case _ => log.error(errMsg)
            }
            fieldId -> FieldResponseCodec.fromThrowable(mhException, Some(errMsg))
          case Throw(e) =>
            val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $e"
            log.error(errMsg)
            fieldId -> FieldResponse(FieldResponseCode.Error, Some(errMsg))
        }
    }
  /**
   * Helper function to build a TweetResponse object when being rate limited. Its possible that only some of the fields
   * got rate limited, so we indicate which fields got processed successfully, and which encountered some sort of error.
   *
   * @param tweetId Tweet id
   * @param callerName name of API calling this function
   * @param fieldResponses field responses for the case where
   *
   * @return The TweetResponse object
   */
  private[tweetypie] def buildTweetOverCapacityResponse(
    callerName: String,
    tweetId: Long,
    fieldResponses: Map[FieldId, Try[Unit]]
  ) = {
    val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResponses)
    TweetResponse(tweetId, TweetResponseCode.OverCapacity, Some(fieldResponsesMap))
  }
  /**
   * Build a StoredTweet from a Seq of records. Core fields are handled specially.
   */
  private[tweetypie] def buildStoredTweet(
    tweetId: TweetId,
    records: Seq[TweetManhattanRecord],
    includeScrubbed: Boolean = false,
  ): StoredTweet = {
    getStoredTweetBlobs(records, includeScrubbed)
      .flatMap { fieldBlob =>
        // When fieldId == TweetFields.rootCoreFieldId, we have further work to do since the
        // 'value' is really serialized/packed version of all core fields. In this case we'll have
        // to unpack it into many TFieldBlobs.
        if (fieldBlob.id == TweetFields.rootCoreFieldId) {
          // We won't throw any error in this function and instead let the caller function handle this
          // condition (i.e If the caller function does not find any values for the core-fields in
          // the returned map, it should assume that the tweet is not found)
          CoreFieldsCodec.unpackFields(fieldBlob).values.toSeq
        } else {
          Seq(fieldBlob)
        }
      }.foldLeft(StoredTweet(tweetId))(_.setField(_))
  }
  private[tweetypie] def buildValidStoredTweet(
    tweetId: TweetId,
    records: Seq[TweetManhattanRecord]
  ): Option[StoredTweet] = {
    val storedTweet = buildStoredTweet(tweetId, records)
    if (storedTweet.getFieldBlobs(expectedFields).nonEmpty && isValid(storedTweet)) {
      Some(storedTweet)
    } else {
      None
    }
  }
  /**
   * Return a TFieldBlob for each StoredTweet field defined in this set of records.
   * @param includeScrubbed when false, result will not include scrubbed fields even
   *                        if the data is present in the set of records.
   */
  private[tweetypie] def getStoredTweetBlobs(
    records: Seq[TweetManhattanRecord],
    includeScrubbed: Boolean = false,
  ): Seq[TFieldBlob] = {
    val scrubbed = extractScrubbedFields(records)
    records
      .flatMap { r =>
        // extract LKey.FieldKey records if they are not scrubbed and get their TFieldBlobs
        r.key match {
          case fullKey @ TweetKey(_, key: TweetKey.LKey.FieldKey)
              if includeScrubbed || !scrubbed.contains(key.fieldId) =>
            try {
              val fieldBlob = TFieldBlobCodec.fromByteBuffer(r.value.contents)
              if (fieldBlob.field.id != key.fieldId) {
                throw new AssertionError(
                  s"Blob stored for $fullKey has unexpected id ${fieldBlob.field.id}"
                )
              }
              Some(fieldBlob)
            } catch {
              case e: VersionMismatchError =>
                log.error(
                  s"Failed to decode bytebuffer for $fullKey: ${e.getMessage}"
                )
                throw e
            }
          case _ => None
        }
      }
  }
  /**
   * Its important to bubble up rate limiting exceptions as they would likely be the root cause for other issues
   * (timeouts etc.), so we scan for this particular exception, and if found, we bubble that up specifically
   *
   * @param seqOfTries The sequence of tries which may contain within it a rate limit exception
   *
   * @return if a rate limiting exn was detected, this will be a Throw(e: DeniedManhattanException)
   *         otherwise it will be a Return(_) only if all individual tries succeeded
   */
  private[tweetypie] def collectWithRateLimitCheck(seqOfTries: Seq[Try[Unit]]): Try[Unit] = {
    val rateLimitThrowOpt = seqOfTries.find {
      case Throw(e: DeniedManhattanException) => true
      case _ => false
    }
    rateLimitThrowOpt.getOrElse(
      Try.collect(seqOfTries).map(_ => ())
    ) // Operation is considered successful only if all the deletions are successful
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala
@ -0,0 +1,106 @@
 package com.twitter.tweetypie.storage
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.stitch.Stitch
 import com.twitter.tweetypie.storage.TweetStorageClient.Undelete
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.util.Time
 object UndeleteHandler {
  def apply(
    read: ManhattanOperations.Read,
    localInsert: ManhattanOperations.Insert,
    remoteInsert: ManhattanOperations.Insert,
    delete: ManhattanOperations.Delete,
    undeleteWindowHours: Int,
    stats: StatsReceiver
  ): Undelete = {
    def withinUndeleteWindow(timestampMs: Long) =
      (Time.now - Time.fromMilliseconds(timestampMs)).inHours < undeleteWindowHours
    def prepareUndelete(
      tweetId: TweetId,
      records: Seq[TweetManhattanRecord]
    ): (Undelete.Response, Option[TweetManhattanRecord]) = {
      val undeleteRecord =
        Some(TweetStateRecord.Undeleted(tweetId, Time.now.inMillis).toTweetMhRecord)
      TweetStateRecord.mostRecent(records) match {
        // check if we need to undo a soft deletion
        case Some(TweetStateRecord.SoftDeleted(_, createdAt)) =>
          if (createdAt > 0) {
            if (withinUndeleteWindow(createdAt)) {
              (
                mkSuccessfulUndeleteResponse(tweetId, records, Some(createdAt)),
                undeleteRecord
              )
            } else {
              (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None)
            }
          } else {
            throw InternalError(s"Timestamp unavailable for $tweetId")
          }
        // BounceDeleted tweets may not be undeleted. see go/bouncedtweet
        case Some(_: TweetStateRecord.HardDeleted | _: TweetStateRecord.BounceDeleted) =>
          (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None)
        case Some(_: TweetStateRecord.Undeleted) =>
          // We still want to write the undelete record, because at this point, we only know that the local DC's
          // winning record is not a soft/hard deletion record, while its possible that the remote DC's winning
          // record might still be a soft deletion record. Having said that, we don't want to set it to true
          // if the winning record is forceAdd, as the forceAdd call should have ensured that both DCs had the
          // forceAdd record.
          (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord)
        case Some(_: TweetStateRecord.ForceAdded) =>
          (mkSuccessfulUndeleteResponse(tweetId, records), None)
        // lets write the undeletion record just in case there is a softdeletion record in flight
        case None => (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord)
      }
    }
    // Write the undelete record both locally and remotely to protect
    // against races with hard delete replication. We only need this
    // protection for the insertion of the undelete record.
    def multiInsert(record: TweetManhattanRecord): Stitch[Unit] =
      Stitch
        .collect(
          Seq(
            localInsert(record).liftToTry,
            remoteInsert(record).liftToTry
          )
        )
        .map(collectWithRateLimitCheck)
        .lowerFromTry
    def deleteSoftDeleteRecord(tweetId: TweetId): Stitch[Unit] = {
      val mhKey = TweetKey.softDeletionStateKey(tweetId)
      delete(mhKey, None)
    }
    tweetId =>
      for {
        records <- read(tweetId)
        (response, undeleteRecord) = prepareUndelete(tweetId, records)
        _ <- Stitch.collect(undeleteRecord.map(multiInsert)).unit
        _ <- deleteSoftDeleteRecord(tweetId)
      } yield {
        response
      }
  }
  private[storage] def mkSuccessfulUndeleteResponse(
    tweetId: TweetId,
    records: Seq[TweetManhattanRecord],
    timestampOpt: Option[Long] = None
  ) =
    Undelete.Response(
      Undelete.UndeleteResponseCode.Success,
      Some(
        StorageConversions.fromStoredTweet(buildStoredTweet(tweetId, records))
      ),
      archivedAtMillis = timestampOpt
    )
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala
@ -0,0 +1,64 @@
 package com.twitter.tweetypie.storage
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.stitch.Stitch
 import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
 import com.twitter.storage.client.manhattan.kv.ManhattanValue
 import com.twitter.tweetypie.storage.TweetUtils._
 import com.twitter.tweetypie.thriftscala.Tweet
 import com.twitter.util.Throw
 import com.twitter.util.Time
 object UpdateTweetHandler {
  def apply(
    insert: ManhattanOperations.Insert,
    stats: StatsReceiver
  ): TweetStorageClient.UpdateTweet = { (tpTweet: Tweet, fields: Seq[Field]) =>
    require(
      fields.forall(!TweetFields.coreFieldIds.contains(_)),
      "Core fields cannot be modified by calling updateTweet; use addTweet instead."
    )
    require(
      areAllFieldsDefined(tpTweet, fields),
      s"Input tweet $tpTweet does not have specified fields $fields set"
    )
    val now = Time.now
    val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet)
    val tweetId = storedTweet.id
    Stats.updatePerFieldQpsCounters("updateTweet", fields.map(_.id), 1, stats)
    val (fieldIds, stitchesPerTweet) =
      fields.map { field =>
        val fieldId = field.id
        val tweetKey = TweetKey.fieldKey(tweetId, fieldId)
        val blob = storedTweet.getFieldBlob(fieldId).get
        val value = ManhattanValue(TFieldBlobCodec.toByteBuffer(blob), Some(now))
        val record = TweetManhattanRecord(tweetKey, value)
        (fieldId, insert(record).liftToTry)
      }.unzip
    Stitch.collect(stitchesPerTweet).map { seqOfTries =>
      val fieldkeyAndMhResults = fieldIds.zip(seqOfTries).toMap
      // If even a single field was rate limited, we will send an overall OverCapacity TweetResponse
      val wasRateLimited = fieldkeyAndMhResults.exists { keyAndResult =>
        keyAndResult._2 match {
          case Throw(e: DeniedManhattanException) => true
          case _ => false
        }
      }
      if (wasRateLimited) {
        buildTweetOverCapacityResponse("updateTweets", tweetId, fieldkeyAndMhResults)
      } else {
        buildTweetResponse("updateTweets", tweetId, fieldkeyAndMhResults)
      }
    }
  }
  private def areAllFieldsDefined(tpTweet: Tweet, fields: Seq[Field]) = {
    val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet)
    fields.map(_.id).forall(storedTweet.getFieldBlob(_).isDefined)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala
@ -0,0 +1,11 @@
 package com.twitter.tweetypie
 import com.twitter.storage.client.manhattan.kv.ManhattanValue
 import java.nio.ByteBuffer
 package object storage {
  type TweetId = Long
  type FieldId = Short
  type TweetManhattanValue = ManhattanValue[ByteBuffer]
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD
@ -0,0 +1,20 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "finagle/finagle-core/src/main",
        "flock-client/src/main/scala",
        "flock-client/src/main/thrift:thrift-scala",
        "tweetypie/servo/util/src/main/scala",
        "snowflake:id",
        "src/thrift/com/twitter/gizmoduck:thrift-scala",
        "src/thrift/com/twitter/servo:servo-exception-java",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
        "tweetypie/server/src/main/scala/com/twitter/tweetypie",
        "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil",
        "tweetypie/common/src/scala/com/twitter/tweetypie/util",
        "util/util-core:scala",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala
@ -0,0 +1,532 @@
 /** Copyright 2010 Twitter, Inc. */
 package com.twitter.tweetypie
 package tflock
 import com.twitter.finagle.stats.Counter
 import com.twitter.flockdb.client._
 import com.twitter.flockdb.client.thriftscala.Priority
 import com.twitter.snowflake.id.SnowflakeId
 import com.twitter.tweetypie.serverutil.StoredCard
 import com.twitter.tweetypie.thriftscala._
 import com.twitter.util.Future
 import scala.collection.mutable.ListBuffer
 object TFlockIndexer {
  /**
   * Printable names for some edge types currently defined in [[com.twitter.flockdb.client]].
   * Used to defined stats counters for adding edges.
   */
  val graphNames: Map[Int, String] =
    Map(
      CardTweetsGraph.id -> "card_tweets",
      ConversationGraph.id -> "conversation",
      DirectedAtUserIdGraph.id -> "directed_at_user_id",
      InvitedUsersGraph.id -> "invited_users",
      MediaTimelineGraph.id -> "media_timeline",
      MentionsGraph.id -> "mentions",
      NarrowcastSentTweetsGraph.id -> "narrowcast_sent_tweets",
      NullcastedTweetsGraph.id -> "nullcasted_tweets",
      QuotersGraph.id -> "quoters",
      QuotesGraph.id -> "quotes",
      QuoteTweetsIndexGraph.id -> "quote_tweets_index",
      RepliesToTweetsGraph.id -> "replies_to_tweets",
      RetweetsByMeGraph.id -> "retweets_by_me",
      RetweetsGraph.id -> "retweets",
      RetweetsOfMeGraph.id -> "retweets_of_me",
      RetweetSourceGraph.id -> "retweet_source",
      TweetsRetweetedGraph.id -> "tweets_retweeted",
      UserTimelineGraph.id -> "user_timeline",
      CreatorSubscriptionTimelineGraph.id -> "creator_subscription_timeline",
      CreatorSubscriptionMediaTimelineGraph.id -> "creator_subscription_image_timeline",
    )
  /**
   * On edge deletion, edges are either archived permanently or retained for 3 months, based on
   * the retention policy in the above confluence page.
   *
   * These two retention policies correspond to the two deletion techniques: archive and remove.
   * We call removeEdges for edges with a short retention policy and archiveEdges for edges with
   * a permanent retention policy.
   */
  val graphsWithRemovedEdges: Seq[Int] =
    Seq(
      CardTweetsGraph.id,
      CuratedTimelineGraph.id,
      CuratedTweetsGraph.id,
      DirectedAtUserIdGraph.id,
      MediaTimelineGraph.id,
      MutedConversationsGraph.id,
      QuotersGraph.id,
      QuotesGraph.id,
      QuoteTweetsIndexGraph.id,
      ReportedTweetsGraph.id,
      RetweetsOfMeGraph.id,
      RetweetSourceGraph.id,
      SoftLikesGraph.id,
      TweetsRetweetedGraph.id,
      CreatorSubscriptionTimelineGraph.id,
      CreatorSubscriptionMediaTimelineGraph.id,
    )
  /**
   * These edges should be left in place when bounced tweets are deleted.
   * These edges are removed during hard deletion.
   *
   * This is done so external teams (timelines) can execute on these edges for
   * tombstone feature.
   */
  val bounceDeleteGraphIds: Set[Int] =
    Set(
      UserTimelineGraph.id,
      ConversationGraph.id
    )
  def makeCounters(stats: StatsReceiver, operation: String): Map[Int, Counter] = {
    TFlockIndexer.graphNames
      .mapValues(stats.scope(_).counter(operation))
      .withDefaultValue(stats.scope("unknown").counter(operation))
  }
 }
 /**
 * @param backgroundIndexingPriority specifies the queue to use for
 *   background indexing operations. This is useful for making the
 *   effects of background indexing operations (such as deleting edges
 *   for deleted Tweets) available sooner in testing scenarios
 *   (end-to-end tests or development instances). It is set to
 *   Priority.Low in production to reduce the load on high priority
 *   queues that we use for prominently user-visible operations.
 */
 class TFlockIndexer(
  tflock: TFlockClient,
  hasMedia: Tweet => Boolean,
  backgroundIndexingPriority: Priority,
  stats: StatsReceiver)
    extends TweetIndexer {
  private[this] val FutureNil = Future.Nil
  private[this] val archiveCounters = TFlockIndexer.makeCounters(stats, "archive")
  private[this] val removeCounters = TFlockIndexer.makeCounters(stats, "remove")
  private[this] val insertCounters = TFlockIndexer.makeCounters(stats, "insert")
  private[this] val negateCounters = TFlockIndexer.makeCounters(stats, "negate")
  private[this] val foregroundIndexingPriority: Priority = Priority.High
  override def createIndex(tweet: Tweet): Future[Unit] =
    createEdges(tweet, isUndelete = false)
  override def undeleteIndex(tweet: Tweet): Future[Unit] =
    createEdges(tweet, isUndelete = true)
  private[this] case class PartitionedEdges(
    longRetention: Seq[ExecuteEdge[StatusGraph]] = Nil,
    shortRetention: Seq[ExecuteEdge[StatusGraph]] = Nil,
    negate: Seq[ExecuteEdge[StatusGraph]] = Nil,
    ignore: Seq[ExecuteEdge[StatusGraph]] = Nil)
  private[this] def partitionEdgesForDelete(
    edges: Seq[ExecuteEdge[StatusGraph]],
    isBounceDelete: Boolean
  ) =
    edges.foldLeft(PartitionedEdges()) {
      // Two dependees of UserTimelineGraph edge states to satisfy: timelines & safety tools.
      // Timelines show bounce-deleted tweets as tombstones; regular deletes are not shown.
      //   - i.e. timelineIds = UserTimelineGraph(Normal || Negative)
      // Safety tools show deleted tweets to authorized internal review agents
      //   - i.e. deletedIds = UserTimelineGraph(Removed || Negative)
      case (partitionedEdges, edge) if isBounceDelete && edge.graphId == UserTimelineGraph.id =>
        partitionedEdges.copy(negate = edge +: partitionedEdges.negate)
      case (partitionedEdges, edge) if isBounceDelete && edge.graphId == ConversationGraph.id =>
        // Bounce-deleted tweets remain rendered as tombstones in conversations, so do not modify
        // the ConversationGraph edge state
        partitionedEdges.copy(ignore = edge +: partitionedEdges.ignore)
      case (partitionedEdges, edge)
          if TFlockIndexer.graphsWithRemovedEdges.contains(edge.graphId) =>
        partitionedEdges.copy(shortRetention = edge +: partitionedEdges.shortRetention)
      case (partitionedEdges, edge) =>
        partitionedEdges.copy(longRetention = edge +: partitionedEdges.longRetention)
    }
  override def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] =
    for {
      edges <- getEdges(tweet, isCreate = false, isDelete = true, isUndelete = false)
      partitionedEdges = partitionEdgesForDelete(edges, isBounceDelete)
      () <-
        Future
          .join(
            tflock
              .archiveEdges(partitionedEdges.longRetention, backgroundIndexingPriority)
              .onSuccess(_ =>
                partitionedEdges.longRetention.foreach(e => archiveCounters(e.graphId).incr())),
            tflock
              .removeEdges(partitionedEdges.shortRetention, backgroundIndexingPriority)
              .onSuccess(_ =>
                partitionedEdges.shortRetention.foreach(e => removeCounters(e.graphId).incr())),
            tflock
              .negateEdges(partitionedEdges.negate, backgroundIndexingPriority)
              .onSuccess(_ =>
                partitionedEdges.negate.foreach(e => negateCounters(e.graphId).incr()))
          )
          .unit
    } yield ()
  /**
   * This operation is called when a user is put into or taken out of
   * a state in which their retweets should no longer be visible
   * (e.g. suspended or ROPO).
   */
  override def setRetweetVisibility(retweetId: TweetId, setVisible: Boolean): Future[Unit] = {
    val retweetEdge = Seq(ExecuteEdge(retweetId, RetweetsGraph, None, Reverse))
    if (setVisible) {
      tflock
        .insertEdges(retweetEdge, backgroundIndexingPriority)
        .onSuccess(_ => insertCounters(RetweetsGraph.id).incr())
    } else {
      tflock
        .archiveEdges(retweetEdge, backgroundIndexingPriority)
        .onSuccess(_ => archiveCounters(RetweetsGraph.id).incr())
    }
  }
  private[this] def createEdges(tweet: Tweet, isUndelete: Boolean): Future[Unit] =
    for {
      edges <- getEdges(tweet = tweet, isCreate = true, isDelete = false, isUndelete = isUndelete)
      () <- tflock.insertEdges(edges, foregroundIndexingPriority)
    } yield {
      // Count all the edges we've successfully added:
      edges.foreach(e => insertCounters(e.graphId).incr())
    }
  private[this] def addRTEdges(
    tweet: Tweet,
    share: Share,
    isCreate: Boolean,
    edges: ListBuffer[ExecuteEdge[StatusGraph]],
    futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]
  ): Unit = {
    edges += RetweetsOfMeGraph.edge(share.sourceUserId, tweet.id)
    edges += RetweetsByMeGraph.edge(getUserId(tweet), tweet.id)
    edges += RetweetsGraph.edge(share.sourceStatusId, tweet.id)
    if (isCreate) {
      edges += ExecuteEdge(
        sourceId = getUserId(tweet),
        graph = RetweetSourceGraph,
        destinationIds = Some(Seq(share.sourceStatusId)),
        direction = Forward,
        position = Some(SnowflakeId(tweet.id).time.inMillis)
      )
      edges.append(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId))
    } else {
      edges += RetweetSourceGraph.edge(getUserId(tweet), share.sourceStatusId)
      // if this is the last retweet we need to remove it from the source user's
      // tweets retweeted graph
      futureEdges.append(
        tflock.count(RetweetsGraph.from(share.sourceStatusId)).flatMap { count =>
          if (count <= 1) {
            tflock.selectAll(RetweetsGraph.from(share.sourceStatusId)).map { tweets =>
              if (tweets.size <= 1)
                Seq(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId))
              else
                Nil
            }
          } else {
            FutureNil
          }
        }
      )
    }
  }
  private[this] def addReplyEdges(
    tweet: Tweet,
    edges: ListBuffer[ExecuteEdge[StatusGraph]]
  ): Unit = {
    getReply(tweet).foreach { reply =>
      reply.inReplyToStatusId.flatMap { inReplyToStatusId =>
        edges += RepliesToTweetsGraph.edge(inReplyToStatusId, tweet.id)
        // only index conversationId if this is a reply to another tweet
        TweetLenses.conversationId.get(tweet).map { conversationId =>
          edges += ConversationGraph.edge(conversationId, tweet.id)
        }
      }
    }
  }
  private[this] def addDirectedAtEdges(
    tweet: Tweet,
    edges: ListBuffer[ExecuteEdge[StatusGraph]]
  ): Unit = {
    TweetLenses.directedAtUser.get(tweet).foreach { directedAtUser =>
      edges += DirectedAtUserIdGraph.edge(directedAtUser.userId, tweet.id)
    }
  }
  private[this] def addMentionEdges(
    tweet: Tweet,
    edges: ListBuffer[ExecuteEdge[StatusGraph]]
  ): Unit = {
    getMentions(tweet)
      .flatMap(_.userId).foreach { mention =>
        edges += MentionsGraph.edge(mention, tweet.id)
      }
  }
  private[this] def addQTEdges(
    tweet: Tweet,
    edges: ListBuffer[ExecuteEdge[StatusGraph]],
    futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]],
    isCreate: Boolean
  ): Unit = {
    val userId = getUserId(tweet)
    tweet.quotedTweet.foreach { quotedTweet =>
      // Regardless of tweet creates/deletes, we add the corresponding edges to the
      // following two graphs. Note that we're handling the case for
      // the QuotersGraph slightly differently in the tweet delete case.
      edges.append(QuotesGraph.edge(quotedTweet.userId, tweet.id))
      edges.append(QuoteTweetsIndexGraph.edge(quotedTweet.tweetId, tweet.id))
      if (isCreate) {
        // As mentioned above, for tweet creates we go ahead and add an edge
        // to the QuotersGraph without any additional checks.
        edges.append(QuotersGraph.edge(quotedTweet.tweetId, userId))
      } else {
        // For tweet deletes, we only add an edge to be deleted from the
        // QuotersGraph if the tweeting user isn't quoting the tweet anymore
        // i.e. if a user has quoted a tweet multiple times, we only delete
        // an edge from the QuotersGraph if they've deleted all the quotes,
        // otherwise an edge should exist by definition of what the QuotersGraph
        // represents.
        // Note: There can be a potential edge case here due to a race condition
        // in the following scenario.
        // i)   A quotes a tweet T twice resulting in tweets T1 and T2.
        // ii)  There should exist edges in the QuotersGraph from T -> A and T1 <-> T, T2 <-> T in
        //      the QuoteTweetsIndexGraph, but one of the edges haven't been written
        //      to the QuoteTweetsIndex graph in TFlock yet.
        // iii) In this scenario, we shouldn't really be deleting an edge as we're doing below.
        // The approach that we're taking below is a "best effort" approach similar to what we
        // currently do for RTs.
        // Find all the quotes of the quoted tweet from the quoting user
        val quotesFromQuotingUser = QuoteTweetsIndexGraph
          .from(quotedTweet.tweetId)
          .intersect(UserTimelineGraph.from(userId))
        futureEdges.append(
          tflock
            .count(quotesFromQuotingUser).flatMap { count =>
              // If this is the last quote of the quoted tweet from the quoting user,
              // we go ahead and delete the edge from the QuotersGraph.
              if (count <= 1) {
                tflock.selectAll(quotesFromQuotingUser).map { tweets =>
                  if (tweets.size <= 1) {
                    Seq(QuotersGraph.edge(quotedTweet.tweetId, userId))
                  } else {
                    Nil
                  }
                }
              } else {
                FutureNil
              }
            }
        )
      }
    }
  }
  private[this] def addCardEdges(
    tweet: Tweet,
    edges: ListBuffer[ExecuteEdge[StatusGraph]]
  ): Unit = {
    // Note that we are indexing only the TOO "stored" cards
    // (cardUri=card://<cardId>). Rest of the cards are ignored here.
    tweet.cardReference
      .collect {
        case StoredCard(id) =>
          edges.append(CardTweetsGraph.edge(id, tweet.id))
      }.getOrElse(())
  }
  // Note: on undelete, this method restores all archived edges, including those that may have
  // been archived prior to the delete. This is incorrect behavior but in practice rarely
  // causes problems, as undeletes are so rare.
  private[this] def addEdgesForDeleteOrUndelete(
    tweet: Tweet,
    edges: ListBuffer[ExecuteEdge[StatusGraph]]
  ): Unit = {
    edges.appendAll(
      Seq(
        MentionsGraph.edges(tweet.id, None, Reverse),
        RepliesToTweetsGraph.edges(tweet.id, None)
      )
    )
    // When we delete or undelete a conversation control root Tweet we want to archive or restore
    // all the edges in InvitedUsersGraph from the Tweet id.
    if (hasConversationControl(tweet) && isConversationRoot(tweet)) {
      edges.append(InvitedUsersGraph.edges(tweet.id, None))
    }
  }
  private[this] def addSimpleEdges(
    tweet: Tweet,
    edges: ListBuffer[ExecuteEdge[StatusGraph]]
  ): Unit = {
    if (TweetLenses.nullcast.get(tweet)) {
      edges.append(NullcastedTweetsGraph.edge(getUserId(tweet), tweet.id))
    } else if (TweetLenses.narrowcast.get(tweet).isDefined) {
      edges.append(NarrowcastSentTweetsGraph.edge(getUserId(tweet), tweet.id))
    } else {
      edges.append(UserTimelineGraph.edge(getUserId(tweet), tweet.id))
      if (hasMedia(tweet))
        edges.append(MediaTimelineGraph.edge(getUserId(tweet), tweet.id))
      // Index root creator subscription tweets.
      // Ignore replies because those are not necessarily visible to a user who subscribes to tweet author
      val isRootTweet: Boolean = tweet.coreData match {
        case Some(c) => c.reply.isEmpty && c.share.isEmpty
        case None => true
      }
      if (tweet.exclusiveTweetControl.isDefined && isRootTweet) {
        edges.append(CreatorSubscriptionTimelineGraph.edge(getUserId(tweet), tweet.id))
        if (hasMedia(tweet))
          edges.append(CreatorSubscriptionMediaTimelineGraph.edge(getUserId(tweet), tweet.id))
      }
    }
  }
  /**
   * Issues edges for each mention of user in a conversation-controlled tweet. This way InvitedUsers
   * graph accumulates complete set of ids for @mention-invited users, by conversation id.
   */
  private def invitedUsersEdgesForCreate(
    tweet: Tweet,
    edges: ListBuffer[ExecuteEdge[StatusGraph]]
  ): Unit = {
    val conversationId: Long = getConversationId(tweet).getOrElse(tweet.id)
    val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId)
    edges.appendAll(mentions.map(userId => InvitedUsersGraph.edge(conversationId, userId)))
  }
  /**
   * Issues edges of InviteUsersGraph that ought to be deleted for a conversation controlled reply.
   * These are mentions of users in the given tweet, only if the user was not mentioned elsewhere
   * in the conversation. This way for a conversation, InvitedUsersGraph would always hold a set
   * of all users invited to the conversation, and an edge is removed only after the last mention of
   * a user is deleted.
   */
  private def invitedUsersEdgesForDelete(
    tweet: Tweet,
    futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]
  ): Unit = {
    getConversationId(tweet).foreach { conversationId: Long =>
      val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId)
      mentions.foreach { userId =>
        val tweetIdsWithinConversation = ConversationGraph.from(conversationId)
        val tweetIdsThatMentionUser = MentionsGraph.from(userId)
        futureEdges.append(
          tflock
            .selectAll(
              query = tweetIdsThatMentionUser.intersect(tweetIdsWithinConversation),
              limit = Some(2), // Just need to know if it is >1 or <=1, so 2 are enough.
              pageSize = None // Provide default, otherwise Mockito complains
            ).map { tweetIds: Seq[Long] =>
              if (tweetIds.size <= 1) {
                Seq(InvitedUsersGraph.edge(conversationId, userId))
              } else {
                Nil
              }
            }
        )
      }
    }
  }
  private def hasInviteViaMention(tweet: Tweet): Boolean = {
    tweet.conversationControl match {
      case Some(ConversationControl.ByInvitation(controls)) =>
        controls.inviteViaMention.getOrElse(false)
      case Some(ConversationControl.Community(controls)) =>
        controls.inviteViaMention.getOrElse(false)
      case Some(ConversationControl.Followers(followers)) =>
        followers.inviteViaMention.getOrElse(false)
      case _ =>
        false
    }
  }
  private def hasConversationControl(tweet: Tweet): Boolean =
    tweet.conversationControl.isDefined
  // If a Tweet has a ConversationControl, it must have a ConversationId associated with it so we
  // can compare the ConversationId with the current Tweet ID to determine if it's the root of the
  // conversation. See ConversationIdHydrator for more details
  private def isConversationRoot(tweet: Tweet): Boolean =
    getConversationId(tweet).get == tweet.id
  private def addInvitedUsersEdges(
    tweet: Tweet,
    isCreate: Boolean,
    isUndelete: Boolean,
    edges: ListBuffer[ExecuteEdge[StatusGraph]],
    futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]
  ): Unit = {
    if (hasConversationControl(tweet)) {
      if (isCreate) {
        if (isConversationRoot(tweet) && !isUndelete) {
          // For root Tweets, only add edges for original creates, not for undeletes.
          // Undeletes are handled by addEdgesForDeleteOrUndelete.
          invitedUsersEdgesForCreate(tweet, edges)
        }
        if (!isConversationRoot(tweet) && hasInviteViaMention(tweet)) {
          // For replies, only add edges when the conversation control is in inviteViaMention mode.
          invitedUsersEdgesForCreate(tweet, edges)
        }
      } else {
        if (!isConversationRoot(tweet)) {
          invitedUsersEdgesForDelete(tweet, futureEdges)
        }
      }
    }
  }
  private[this] def getEdges(
    tweet: Tweet,
    isCreate: Boolean,
    isDelete: Boolean,
    isUndelete: Boolean
  ): Future[Seq[ExecuteEdge[StatusGraph]]] = {
    val edges = ListBuffer[ExecuteEdge[StatusGraph]]()
    val futureEdges = ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]()
    addSimpleEdges(tweet, edges)
    getShare(tweet) match {
      case Some(share) => addRTEdges(tweet, share, isCreate, edges, futureEdges)
      case _ =>
        addInvitedUsersEdges(tweet, isCreate, isUndelete, edges, futureEdges)
        addReplyEdges(tweet, edges)
        addDirectedAtEdges(tweet, edges)
        addMentionEdges(tweet, edges)
        addQTEdges(tweet, edges, futureEdges, isCreate)
        addCardEdges(tweet, edges)
        if (isDelete || isUndelete) {
          addEdgesForDeleteOrUndelete(tweet, edges)
        }
    }
    Future
      .collect(futureEdges)
      .map { moreEdges => (edges ++= moreEdges.flatten).toList }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala
@ -0,0 +1,30 @@
 /** Copyright 2010 Twitter, Inc. */
 package com.twitter.tweetypie
 package tflock
 import com.twitter.tweetypie.thriftscala.Tweet
 import com.twitter.util.Future
 trait TweetIndexer {
  /**
   * Called at tweet-creation time, this method should set up all relevant indices on the tweet.
   */
  def createIndex(tweet: Tweet): Future[Unit] = Future.Unit
  /**
   * Called at tweet-undelete time (which isn't yet handled), this method should
   * restore all relevant indices on the tweet.
   */
  def undeleteIndex(tweet: Tweet): Future[Unit] = Future.Unit
  /**
   * Called at tweet-delete time, this method should archive all relevant indices on the tweet.
   */
  def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] = Future.Unit
  /**
   * This method should archive or unarchive the retweet edge in TFlock RetweetsGraph.
   */
  def setRetweetVisibility(retweetId: TweetId, visible: Boolean): Future[Unit] = Future.Unit
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD
@ -0,0 +1,13 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "finagle/finagle-core/src/main",
        "scrooge/scrooge-core/src/main/scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
        "util/util-core:scala",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala
@ -0,0 +1,8 @@
 package com.twitter.tweetypie.thriftscala
 import com.twitter.finagle.service.FailedService
 class NotImplementedTweetService
    extends TweetService$FinagleClient(
      new FailedService(new UnsupportedOperationException("not implemented"))
    )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala
@ -0,0 +1,79 @@
 package com.twitter.tweetypie.thriftscala
 import com.twitter.util.Future
 /**
 * A trait for TweetService implementations that wrap an underlying
 * TweetService and need to modify only some of the methods.
 */
 trait TweetServiceProxy extends TweetService.MethodPerEndpoint {
  protected def underlying: TweetService.MethodPerEndpoint
  /**
   * Default implementation simply passes through the Future but logic can be added to wrap each
   * invocation to the underlying TweetService
   */
  protected def wrap[A](f: => Future[A]): Future[A] =
    f
  override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] =
    wrap(underlying.getTweets(request))
  override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] =
    wrap(underlying.getTweetFields(request))
  override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] =
    wrap(underlying.getTweetCounts(request))
  override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] =
    wrap(underlying.setAdditionalFields(request))
  override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] =
    wrap(underlying.deleteAdditionalFields(request))
  override def postTweet(request: PostTweetRequest): Future[PostTweetResult] =
    wrap(underlying.postTweet(request))
  override def postRetweet(request: RetweetRequest): Future[PostTweetResult] =
    wrap(underlying.postRetweet(request))
  override def unretweet(request: UnretweetRequest): Future[UnretweetResult] =
    wrap(underlying.unretweet(request))
  override def getDeletedTweets(
    request: GetDeletedTweetsRequest
  ): Future[Seq[GetDeletedTweetResult]] =
    wrap(underlying.getDeletedTweets(request))
  override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] =
    wrap(underlying.deleteTweets(request))
  override def updatePossiblySensitiveTweet(
    request: UpdatePossiblySensitiveTweetRequest
  ): Future[Unit] =
    wrap(underlying.updatePossiblySensitiveTweet(request))
  override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] =
    wrap(underlying.undeleteTweet(request))
  override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] =
    wrap(underlying.eraseUserTweets(request))
  override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] =
    wrap(underlying.incrTweetFavCount(request))
  override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] =
    wrap(underlying.deleteLocationData(request))
  override def scrubGeo(request: GeoScrub): Future[Unit] =
    wrap(underlying.scrubGeo(request))
  override def takedown(request: TakedownRequest): Future[Unit] =
    wrap(underlying.takedown(request))
  override def flush(request: FlushRequest): Future[Unit] =
    wrap(underlying.flush(request))
  override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] =
    wrap(underlying.incrTweetBookmarkCount(request))
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD
@ -0,0 +1,15 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "tweetypie/servo/util",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
        "tco-util",
        "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext",
        "tweetypie/common/src/scala/com/twitter/tweetypie/util",
        "twitter-text/lib/java/src/main/java/com/twitter/twittertext",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala
@ -0,0 +1,11 @@
 package com.twitter.tweetypie.thriftscala.entities
 import com.twitter.tweetypie.thriftscala.CashtagEntity
 import com.twitter.tweetypie.tweettext.TextEntity
 object CashtagTextEntity extends TextEntity[CashtagEntity] {
  override def fromIndex(entity: CashtagEntity): Short = entity.fromIndex
  override def toIndex(entity: CashtagEntity): Short = entity.toIndex
  override def move(entity: CashtagEntity, fromIndex: Short, toIndex: Short): CashtagEntity =
    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala
@ -0,0 +1,118 @@
 package com.twitter.tweetypie.thriftscala.entities
 import com.twitter.servo.data.Mutation
 import com.twitter.tco_util.TcoUrl
 import com.twitter.tweetypie.thriftscala._
 import com.twitter.tweetypie.thriftscala.entities.Implicits._
 import com.twitter.tweetypie.tweettext.PartialHtmlEncoding
 import com.twitter.tweetypie.tweettext.TextEntity
 import com.twitter.tweetypie.tweettext.TextModification
 import com.twitter.tweetypie.util.TweetLenses
 import com.twitter.twittertext.Extractor
 import scala.collection.JavaConverters._
 /**
 * Contains functions to collect urls, mentions, hashtags, and cashtags from the text of tweets and messages
 */
 object EntityExtractor {
  // We only use one configuration of com.twitter.twittertext.Extractor, so it's
  // OK to share one global reference. The only available
  // configuration option is whether to extract URLs without protocols
  // (defaults to true)
  private[this] val extractor = new Extractor
  // The twitter-text library operates on unencoded text, but we store
  // and process HTML-encoded text. The TextModification returned
  // from this function contains the decoded text which we will operate on,
  // but also provides us with the ability to map the indices on
  // the twitter-text entities back to the entities on the encoded text.
  private val htmlEncodedTextToEncodeModification: String => TextModification =
    text =>
      PartialHtmlEncoding
        .decodeWithModification(text)
        .getOrElse(TextModification.identity(text))
        .inverse
  private[this] val extractAllUrlsFromTextMod: TextModification => Seq[UrlEntity] =
    extractUrls(false)
  val extractAllUrls: String => Seq[UrlEntity] =
    htmlEncodedTextToEncodeModification.andThen(extractAllUrlsFromTextMod)
  private[this] val extractTcoUrls: TextModification => Seq[UrlEntity] =
    extractUrls(true)
  private[this] def extractUrls(tcoOnly: Boolean): TextModification => Seq[UrlEntity] =
    mkEntityExtractor[UrlEntity](
      extractor.extractURLsWithIndices(_).asScala.filter { e =>
        if (tcoOnly) TcoUrl.isTcoUrl(e.getValue) else true
      },
      UrlEntity(_, _, _)
    )
  private[this] val extractMentionsFromTextMod: TextModification => Seq[MentionEntity] =
    mkEntityExtractor[MentionEntity](
      extractor.extractMentionedScreennamesWithIndices(_).asScala,
      MentionEntity(_, _, _)
    )
  val extractMentions: String => Seq[MentionEntity] =
    htmlEncodedTextToEncodeModification.andThen(extractMentionsFromTextMod)
  private[this] val extractHashtagsFromTextMod: TextModification => Seq[HashtagEntity] =
    mkEntityExtractor[HashtagEntity](
      extractor.extractHashtagsWithIndices(_).asScala,
      HashtagEntity(_, _, _)
    )
  val extractHashtags: String => Seq[HashtagEntity] =
    htmlEncodedTextToEncodeModification.andThen(extractHashtagsFromTextMod)
  private[this] val extractCashtagsFromTextMod: TextModification => Seq[CashtagEntity] =
    mkEntityExtractor[CashtagEntity](
      extractor.extractCashtagsWithIndices(_).asScala,
      CashtagEntity(_, _, _)
    )
  val extractCashtags: String => Seq[CashtagEntity] =
    htmlEncodedTextToEncodeModification.andThen(extractCashtagsFromTextMod)
  private[this] def mkEntityExtractor[E: TextEntity](
    extract: String => Seq[Extractor.Entity],
    construct: (Short, Short, String) => E
  ): TextModification => Seq[E] =
    htmlEncodedMod => {
      val convert: Extractor.Entity => Option[E] =
        e =>
          for {
            start <- asShort(e.getStart.intValue)
            end <- asShort(e.getEnd.intValue)
            if e.getValue != null
            res <- htmlEncodedMod.reindexEntity(construct(start, end, e.getValue))
          } yield res
      val entities = extract(htmlEncodedMod.original)
      extractor.modifyIndicesFromUTF16ToUnicode(htmlEncodedMod.original, entities.asJava)
      entities.map(convert).flatten
    }
  private[this] def asShort(i: Int): Option[Short] =
    if (i.isValidShort) Some(i.toShort) else None
  private[this] def mutation(extractUrls: Boolean): Mutation[Tweet] =
    Mutation { tweet =>
      val htmlEncodedMod = htmlEncodedTextToEncodeModification(TweetLenses.text.get(tweet))
      Some(
        tweet.copy(
          urls = if (extractUrls) Some(extractTcoUrls(htmlEncodedMod)) else tweet.urls,
          mentions = Some(extractMentionsFromTextMod(htmlEncodedMod)),
          hashtags = Some(extractHashtagsFromTextMod(htmlEncodedMod)),
          cashtags = Some(extractCashtagsFromTextMod(htmlEncodedMod))
        )
      )
    }
  val mutationWithoutUrls: Mutation[Tweet] = mutation(false)
  val mutationAll: Mutation[Tweet] = mutation(true)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala
@ -0,0 +1,11 @@
 package com.twitter.tweetypie.thriftscala.entities
 import com.twitter.tweetypie.thriftscala.HashtagEntity
 import com.twitter.tweetypie.tweettext.TextEntity
 object HashtagTextEntity extends TextEntity[HashtagEntity] {
  override def fromIndex(entity: HashtagEntity): Short = entity.fromIndex
  override def toIndex(entity: HashtagEntity): Short = entity.toIndex
  override def move(entity: HashtagEntity, fromIndex: Short, toIndex: Short): HashtagEntity =
    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala
@ -0,0 +1,10 @@
 package com.twitter.tweetypie.thriftscala.entities
 object Implicits {
  implicit val hashtagTextEntity: HashtagTextEntity.type = HashtagTextEntity
  implicit val cashtagTextEntity: CashtagTextEntity.type = CashtagTextEntity
  implicit val mentionTextEntity: MentionTextEntity.type = MentionTextEntity
  implicit val urlTextEntity: UrlTextEntity.type = UrlTextEntity
  implicit val mediaTextEntity: MediaTextEntity.type = MediaTextEntity
  implicit val textRangeTextEntity: TextRangeEntityAdapter.type = TextRangeEntityAdapter
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala
@ -0,0 +1,11 @@
 package com.twitter.tweetypie.thriftscala.entities
 import com.twitter.tweetypie.thriftscala.MediaEntity
 import com.twitter.tweetypie.tweettext.TextEntity
 object MediaTextEntity extends TextEntity[MediaEntity] {
  override def fromIndex(entity: MediaEntity): Short = entity.fromIndex
  override def toIndex(entity: MediaEntity): Short = entity.toIndex
  override def move(entity: MediaEntity, fromIndex: Short, toIndex: Short): MediaEntity =
    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala
@ -0,0 +1,11 @@
 package com.twitter.tweetypie.thriftscala.entities
 import com.twitter.tweetypie.thriftscala.MentionEntity
 import com.twitter.tweetypie.tweettext.TextEntity
 object MentionTextEntity extends TextEntity[MentionEntity] {
  override def fromIndex(entity: MentionEntity): Short = entity.fromIndex
  override def toIndex(entity: MentionEntity): Short = entity.toIndex
  override def move(entity: MentionEntity, fromIndex: Short, toIndex: Short): MentionEntity =
    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala
@ -0,0 +1,11 @@
 package com.twitter.tweetypie.thriftscala.entities
 import com.twitter.tweetypie.thriftscala.TextRange
 import com.twitter.tweetypie.tweettext.TextEntity
 object TextRangeEntityAdapter extends TextEntity[TextRange] {
  override def fromIndex(entity: TextRange): Short = entity.fromIndex.toShort
  override def toIndex(entity: TextRange): Short = entity.toIndex.toShort
  override def move(entity: TextRange, fromIndex: Short, toIndex: Short): TextRange =
    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala
@ -0,0 +1,11 @@
 package com.twitter.tweetypie.thriftscala.entities
 import com.twitter.tweetypie.thriftscala.UrlEntity
 import com.twitter.tweetypie.tweettext.TextEntity
 object UrlTextEntity extends TextEntity[UrlEntity] {
  override def fromIndex(entity: UrlEntity): Short = entity.fromIndex
  override def toIndex(entity: UrlEntity): Short = entity.toIndex
  override def move(entity: UrlEntity, fromIndex: Short, toIndex: Short): UrlEntity =
    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD
@ -0,0 +1,16 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    provides = scala_artifact(
        org = "com.twitter",
        name = "tweetypie-tweettext",
        repo = artifactory,
    ),
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "3rdparty/jvm/com/ibm/icu:icu4j",
        "twitter-text/lib/java/src/main/java/com/twitter/twittertext",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala
@ -0,0 +1,44 @@
 package com.twitter.tweetypie.tweettext
 import com.ibm.icu.text.BreakIterator
 /**
 * Adapt the [[BreakIterator]] interface to a scala [[Iterator]]
 * over the offsets of user-perceived characters in a String.
 */
 object GraphemeIndexIterator {
  /**
   * Produce an iterator over indices in the string that mark the end
   * of a user-perceived character (grapheme)
   */
  def ends(s: String): Iterator[Offset.CodeUnit] =
    // The start of every grapheme but the first is also a grapheme
    // end. The last grapheme ends at the end of the string.
    starts(s).drop(1) ++ Iterator(Offset.CodeUnit.length(s))
  /**
   * Produce an iterator over indices in the string that mark the start
   * of a user-perceived character (grapheme)
   */
  def starts(s: String): Iterator[Offset.CodeUnit] =
    new Iterator[Offset.CodeUnit] {
      private[this] val it = BreakIterator.getCharacterInstance()
      it.setText(s)
      override def hasNext: Boolean = it.current < s.length
      override def next: Offset.CodeUnit = {
        if (!hasNext) throw new IllegalArgumentException(s"${it.current()}, ${s.length}")
        // No matter what, we will be returning the value of `current`,
        // which is the index of the start of the next grapheme.
        val result = it.current()
        it.next()
        Offset.CodeUnit(result)
      }
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala
@ -0,0 +1,85 @@
 package com.twitter.tweetypie.tweettext
 /**
 * An efficient converter of indices between code points and code units.
 */
 class IndexConverter(text: String) {
  // Keep track of a single corresponding pair of code unit and code point
  // offsets so that we can re-use counting work if the next requested
  // entity is near the most recent entity.
  private var codePointIndex = 0
  // The code unit index should never split a surrogate pair.
  private var charIndex = 0
  /**
   * @param offset Index into the string measured in code units.
   * @return The code point index that corresponds to the specified character index.
   */
  def toCodePoints(offset: Offset.CodeUnit): Offset.CodePoint =
    Offset.CodePoint(codeUnitsToCodePoints(offset.toInt))
  /**
   * @param charIndex Index into the string measured in code units.
   * @return The code point index that corresponds to the specified character index.
   */
  def codeUnitsToCodePoints(charIndex: Int): Int = {
    if (charIndex < this.charIndex) {
      this.codePointIndex -= text.codePointCount(charIndex, this.charIndex)
    } else {
      this.codePointIndex += text.codePointCount(this.charIndex, charIndex)
    }
    this.charIndex = charIndex
    // Make sure that charIndex never points to the second code unit of a
    // surrogate pair.
    if (charIndex > 0 && Character.isSupplementaryCodePoint(text.codePointAt(charIndex - 1))) {
      this.charIndex -= 1
      this.codePointIndex -= 1
    }
    this.codePointIndex
  }
  /**
   * @param offset Index into the string measured in code points.
   * @return the corresponding code unit index
   */
  def toCodeUnits(offset: Offset.CodePoint): Offset.CodeUnit = {
    this.charIndex = text.offsetByCodePoints(charIndex, offset.toInt - this.codePointIndex)
    this.codePointIndex = offset.toInt
    Offset.CodeUnit(this.charIndex)
  }
  /**
   * @param codePointIndex Index into the string measured in code points.
   * @return the corresponding code unit index
   */
  def codePointsToCodeUnits(codePointIndex: Int): Int =
    toCodeUnits(Offset.CodePoint(codePointIndex)).toInt
  /**
   * Returns a substring which begins at the specified code point `from` and extends to the
   * code point `to`. Since String.substring only works with character, the method first
   * converts code point offset to code unit offset.
   */
  def substring(from: Offset.CodePoint, to: Offset.CodePoint): String =
    text.substring(toCodeUnits(from).toInt, toCodeUnits(to).toInt)
  /**
   * Returns a substring which begins at the specified code point `from` and extends to the
   * code point `to`. Since String.substring only works with character, the method first
   * converts code point offset to code unit offset.
   */
  def substringByCodePoints(from: Int, to: Int): String =
    substring(Offset.CodePoint(from), Offset.CodePoint(to))
  /**
   * Returns a substring which begins at the specified code point `from` and extends to the
   * end of the string. Since String.substring only works with character, the method first
   * converts code point offset to code unit offset.
   */
  def substringByCodePoints(from: Int): String = {
    val charFrom = codePointsToCodeUnits(from)
    text.substring(charFrom)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala
@ -0,0 +1,253 @@
 package com.twitter.tweetypie.tweettext
 import scala.collection.immutable
 /**
 * An Offset is a typed index into a String.
 */
 trait Offset[T] extends Ordering[T] {
  def toInt(t: T): Int
  def count(text: String, start: Offset.CodeUnit, end: Offset.CodeUnit): T
  def compare(t1: T, t2: T): Int = toInt(t1).compare(toInt(t2))
  def length(input: String): T = count(input, Offset.CodeUnit(0), Offset.CodeUnit.length(input))
 }
 object Offset {
  /**
   * UTF-16 code unit offsets are the native offsets for Java/Scala
   * Strings.
   */
  case class CodeUnit(toInt: Int) extends AnyVal with Ordered[CodeUnit] {
    def compare(other: CodeUnit): Int = toInt.compare(other.toInt)
    def +(other: CodeUnit) = CodeUnit(toInt + other.toInt)
    def -(other: CodeUnit) = CodeUnit(toInt - other.toInt)
    def min(other: CodeUnit): CodeUnit = if (toInt < other.toInt) this else other
    def max(other: CodeUnit): CodeUnit = if (toInt > other.toInt) this else other
    def incr: CodeUnit = CodeUnit(toInt + 1)
    def decr: CodeUnit = CodeUnit(toInt - 1)
    def until(end: CodeUnit): immutable.IndexedSeq[CodeUnit] =
      toInt.until(end.toInt).map(CodeUnit(_))
    /**
     * Converts this `CodeUnit` to the equivalent `CodePoint` within the
     * given text.
     */
    def toCodePoint(text: String): CodePoint =
      CodePoint(text.codePointCount(0, toInt))
    def offsetByCodePoints(text: String, codePoints: CodePoint): CodeUnit =
      CodeUnit(text.offsetByCodePoints(toInt, codePoints.toInt))
  }
  implicit object CodeUnit extends Offset[CodeUnit] {
    def toInt(u: CodeUnit): Int = u.toInt
    override def length(text: String): CodeUnit = CodeUnit(text.length)
    def count(text: String, start: CodeUnit, end: CodeUnit): CodeUnit = end - start
  }
  /**
   * Offsets in whole Unicode code points. Any CodePoint is a valid
   * offset into the String as long as it is >= 0 and less than the
   * number of code points in the string.
   */
  case class CodePoint(toInt: Int) extends AnyVal with Ordered[CodePoint] {
    def toShort: Short = toInt.toShort
    def compare(other: CodePoint): Int = toInt.compare(other.toInt)
    def +(other: CodePoint) = CodePoint(toInt + other.toInt)
    def -(other: CodePoint) = CodePoint(toInt - other.toInt)
    def min(other: CodePoint): CodePoint = if (toInt < other.toInt) this else other
    def max(other: CodePoint): CodePoint = if (toInt > other.toInt) this else other
    def until(end: CodePoint): immutable.IndexedSeq[CodePoint] =
      toInt.until(end.toInt).map(CodePoint(_))
    def toCodeUnit(text: String): CodeUnit =
      CodeUnit(text.offsetByCodePoints(0, toInt))
  }
  implicit object CodePoint extends Offset[CodePoint] {
    def toInt(p: CodePoint): Int = p.toInt
    def count(text: String, start: CodeUnit, end: CodeUnit): CodePoint =
      CodePoint(text.codePointCount(start.toInt, end.toInt))
  }
  /**
   * Offsets into the String as if the String were encoded as UTF-8. You
   * cannot use a [[Utf8]] offset to index a String, because not all
   * Utf8 indices are valid indices into the String.
   */
  case class Utf8(toInt: Int) extends AnyVal with Ordered[Utf8] {
    def compare(other: Utf8): Int = toInt.compare(other.toInt)
    def +(other: Utf8) = Utf8(toInt + other.toInt)
    def -(other: Utf8) = Utf8(toInt - other.toInt)
    def min(other: Utf8): Utf8 = if (toInt < other.toInt) this else other
    def max(other: Utf8): Utf8 = if (toInt > other.toInt) this else other
  }
  implicit object Utf8 extends Offset[Utf8] {
    def toInt(u: Utf8): Int = u.toInt
    /**
     * Count how many bytes this section of text would be when encoded as
     * UTF-8.
     */
    def count(s: String, start: CodeUnit, end: CodeUnit): Utf8 = {
      def go(i: CodeUnit, byteLength: Utf8): Utf8 =
        if (i < end) {
          val cp = s.codePointAt(i.toInt)
          go(i + CodeUnit(Character.charCount(cp)), byteLength + forCodePoint(cp))
        } else {
          byteLength
        }
      go(start, Utf8(0))
    }
    /**
     * Unfortunately, there is no convenient API for finding out how many
     * bytes a unicode code point would take in UTF-8, so we have to
     * explicitly calculate it.
     *
     * @see http://en.wikipedia.org/wiki/UTF-8#Description
     */
    def forCodePoint(cp: Int): Utf8 =
      Utf8 {
        // if the code point is an unpaired surrogate, it will be converted
        // into a 1 byte replacement character
        if (Character.getType(cp) == Character.SURROGATE) 1
        else {
          cp match {
            case _ if cp < 0x80 => 1
            case _ if cp < 0x800 => 2
            case _ if cp < 0x10000 => 3
            case _ => 4
          }
        }
      }
  }
  /**
   * Display units count what we consider a "character" in a
   * Tweet. [[DisplayUnit]] offsets are only valid for text that is
   * NFC-normalized (See: http://www.unicode.org/reports/tr15) and
   * HTML-encoded, though this interface cannot enforce that.
   *
   * Currently, a [[DisplayUnit]] is equivalent to a single Unicode code
   * point combined with treating "&lt;", "&gt;", and "&amp;" each as a
   * single character (since they are displayed as '<', '>', and '&'
   * respectively). This implementation is not directly exposed.
   *
   * It should be possible to change this definition without breaking
   * code that uses the [[DisplayUnit]] interface e.g. to count
   * user-perceived characters (graphemes) rather than code points,
   * though any change has to be made in concert with changing the
   * mobile client and Web implementations so that the user experience
   * of character counting remains consistent.
   */
  case class DisplayUnit(toInt: Int) extends AnyVal with Ordered[DisplayUnit] {
    def compare(other: DisplayUnit): Int = toInt.compare(other.toInt)
    def +(other: DisplayUnit) = DisplayUnit(toInt + other.toInt)
    def -(other: DisplayUnit) = DisplayUnit(toInt - other.toInt)
    def min(other: DisplayUnit): DisplayUnit = if (toInt < other.toInt) this else other
    def max(other: DisplayUnit): DisplayUnit = if (toInt > other.toInt) this else other
  }
  implicit object DisplayUnit extends Offset[DisplayUnit] {
    def toInt(d: DisplayUnit): Int = d.toInt
    /**
     * Returns the number of display units in the specified range of the
     * given text.  See [[DisplayUnit]] for a descrption of what we
     * consider a display unit.
     *
     * The input string should already be NFC normalized to get
     * consistent results.  If partially html encoded, it will correctly
     * count html entities as a single display unit.
     *
     * @param text the string containing the characters to count.
     * @param the index to the first char of the text range
     * @param the index after the last char of the text range.
     */
    def count(text: String, start: CodeUnit, end: CodeUnit): DisplayUnit = {
      val stop = end.min(CodeUnit.length(text))
      @annotation.tailrec
      def go(offset: CodeUnit, total: DisplayUnit): DisplayUnit =
        if (offset >= stop) total
        else go(offset + at(text, offset), total + DisplayUnit(1))
      go(start, DisplayUnit(0))
    }
    /**
     * Return the length of the display unit at the specified offset in
     * the (NFC-normalized, HTML-encoded) text.
     */
    def at(text: String, offset: CodeUnit): CodeUnit =
      CodeUnit {
        text.codePointAt(offset.toInt) match {
          case '&' =>
            if (text.regionMatches(offset.toInt, "&amp;", 0, 5)) 5
            else if (text.regionMatches(offset.toInt, "&lt;", 0, 4)) 4
            else if (text.regionMatches(offset.toInt, "&gt;", 0, 4)) 4
            else 1
          case cp => Character.charCount(cp)
        }
      }
  }
  /**
   * Ranges of offsets, useful for avoiding slicing entities.
   */
  sealed trait Ranges[T] {
    def contains(t: T): Boolean
  }
  object Ranges {
    private[this] case class Impl[T](toSeq: Seq[(T, T)])(implicit off: Offset[T])
        extends Ranges[T] {
      def contains(t: T): Boolean = toSeq.exists { case (lo, hi) => off.gt(t, lo) && off.lt(t, hi) }
    }
    /**
     * Non-inclusive range of offsets (matches values that are strictly
     * between `hi` and `lo`)
     */
    def between[T](lo: T, hi: T)(implicit off: Offset[T]): Ranges[T] =
      if (off.toInt(hi) > off.toInt(lo) + 1 && off.toInt(lo) < Int.MaxValue) Impl(Seq((lo, hi)))
      else Impl(Nil)
    /**
     * The union of all of the specified ranges.
     */
    def all[T](ranges: Seq[Ranges[T]])(implicit off: Offset[T]): Ranges[T] =
      Impl(
        // Preprocess the ranges so that each contains check is as cheap
        // as possible.
        ranges
          .flatMap { case r: Impl[T] => r.toSeq }
          .sortBy(_._1)
          .foldLeft(Nil: List[(T, T)]) {
            case ((a, b) :: out, (c, d)) if off.lt(c, b) => (a, d) :: out
            case (out, r) => r :: out
          }
      )
    def Empty[T: Offset]: Ranges[T] = Impl[T](Nil)
    private[this] val HtmlEscapes = """&(?:amp|lt|gt);""".r
    /**
     * Match [[CodeUnit]]s that would split a HTML entity.
     */
    def htmlEntities(s: String): Ranges[CodeUnit] = {
      val it = HtmlEscapes.findAllIn(s)
      all(it.map(_ => between(CodeUnit(it.start), CodeUnit(it.end))).toSeq)
    }
    def fromCodePointPairs(pairs: Seq[(Int, Int)]): Ranges[CodePoint] =
      all(pairs.map { case (lo, hi) => between(CodePoint(lo), CodePoint(hi)) })
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala
@ -0,0 +1,55 @@
 package com.twitter.tweetypie.tweettext
 /**
 * Code used to convert raw user-provided text into an allowable form.
 */
 object PartialHtmlEncoding {
  /**
   * Replaces all `<`, `>`, and '&' chars with "&lt;", "&gt;", and "&amp;", respectively.
   *
   * Tweet text is HTML-encoded at tweet creation time, and is stored and processed in encoded form.
   */
  def encode(text: String): String = {
    val buf = new StringBuilder
    text.foreach {
      case '<' => buf.append("&lt;")
      case '>' => buf.append("&gt;")
      case '&' => buf.append("&amp;")
      case c => buf.append(c)
    }
    buf.toString
  }
  private val AmpLtRegex = "&lt;".r
  private val AmpGtRegex = "&gt;".r
  private val AmpAmpRegex = "&amp;".r
  private val partialHtmlDecoder: (String => String) =
    ((s: String) => AmpLtRegex.replaceAllIn(s, "<"))
      .andThen(s => AmpGtRegex.replaceAllIn(s, ">"))
      .andThen(s => AmpAmpRegex.replaceAllIn(s, "&"))
  /**
   * The opposite of encode, it replaces all "&lt;", "&gt;", and "&amp;" with
   * `<`, `>`, and '&', respectively.
   */
  def decode(text: String): String =
    decodeWithModification(text) match {
      case Some(mod) => mod.updated
      case None => text
    }
  /**
   * Decodes encoded entities, and returns a `TextModification` if the text was modified.
   */
  def decodeWithModification(text: String): Option[TextModification] =
    TextModification.replaceAll(
      text,
      AmpLtRegex -> "<",
      AmpGtRegex -> ">",
      AmpAmpRegex -> "&"
    )
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala
@ -0,0 +1,251 @@
 package com.twitter.tweetypie.tweettext
 import scala.util.matching.Regex
 /**
 * Code used to convert raw user-provided text into an allowable form.
 */
 object Preprocessor {
  import TweetText._
  import TextModification.replaceAll
  /**
   * Regex for dos-style line endings.
   */
  val DosLineEndingRegex: Regex = """\r\n""".r
  /**
   * Converts \r\n to just \n.
   */
  def normalizeNewlines(text: String): String =
    DosLineEndingRegex.replaceAllIn(text, "\n")
  /**
   * Characters to strip out of tweet text at write-time.
   */
  val unicodeCharsToStrip: Seq[Char] =
    Seq(
      '\uFFFE', '\uFEFF', // BOM
      '\uFFFF', // Special
      '\u200E', '\u200F', // ltr, rtl
      '\u202A', '\u202B', '\u202C', '\u202D', '\u202E', // Directional change
      '\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008',
      '\u0009', '\u000B', '\u000C', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013',
      '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C',
      '\u001D', '\u001E', '\u001F', '\u007F',
      '\u2065',
    )
  val UnicodeCharsToStripRegex: Regex = unicodeCharsToStrip.mkString("[", "", "]").r
  /**
   * Strips out control characters and other non-textual unicode chars that can break xml and/or
   * json rendering, or be used for exploits.
   */
  def stripControlCharacters(text: String): String =
    UnicodeCharsToStripRegex.replaceAllIn(text, "")
  val Tweetypie674UnicodeSequence: String =
    "\u0633\u0645\u064e\u0640\u064e\u0651\u0648\u064f\u0648\u064f\u062d\u062e " +
      "\u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e " +
      "\u0627\u0645\u0627\u0631\u062a\u064a\u062e \u0337\u0334\u0310\u062e"
  val Tweetypie674UnicodeRegex: Regex = Tweetypie674UnicodeSequence.r
  /**
   * Replace each `Tweetypie674UnicodeSequence` of this string to REPLACEMENT
   * CHARACTER.
   *
   * Apple has a bug in its CoreText library. This aims to prevent
   * ios clients from being crashed when a tweet contains the specific
   * unicode sequence.
   */
  def avoidCoreTextBug(text: String): String =
    Tweetypie674UnicodeRegex.replaceAllIn(text, "\ufffd")
  /**
   * Replace each `Tweetypie674UnicodeSequence` of this string to a REPLACEMENT
   * CHARACTER, returns a TextModification object that provides information
   * to also update entity indices.
   */
  def replaceCoreTextBugModification(text: String): Option[TextModification] =
    replaceAll(text, Tweetypie674UnicodeRegex, "\ufffd")
  private val preprocessor: String => String =
    ((s: String) => nfcNormalize(s))
      .andThen(stripControlCharacters _)
      .andThen(trimBlankCharacters _)
      .andThen(normalizeNewlines _)
      .andThen(collapseBlankLines _)
      .andThen(avoidCoreTextBug _)
  /**
   * Performs the text modifications that are necessary in the write-path before extracting URLs.
   */
  def preprocessText(text: String): String =
    preprocessor(text)
  /**
   * Replaces all `<`, `>`, and '&' chars with "&lt;", "&gt;", and "&amp;", respectively.
   *
   * The original purpose of this was presumably to prevent script injections when
   * displaying tweets without proper escaping.  Currently, tweets are encoded before
   * they are stored in the database.
   *
   * Note that the pre-escaping of & < and > also happens in the rich text editor in javascript
   */
  def partialHtmlEncode(text: String): String =
    PartialHtmlEncoding.encode(text)
  /**
   * The opposite of partialHtmlEncode, it replaces all "&lt;", "&gt;", and "&amp;" with
   * `<`, `>`, and '&', respectively.
   */
  def partialHtmlDecode(text: String): String =
    PartialHtmlEncoding.decode(text)
  /**
   *
   * Detects all forms of whitespace, considering as whitespace the following:
   * This regex detects characters that always or often are rendered as blank space. We use
   * this to prevent users from inserting excess blank lines and from tweeting effectively
   * blank tweets.
   *
   * Note that these are not all semantically "whitespace", so this regex should not be used
   * to process non-blank text, e.g. to separate words.
   *
   * Codepoints below and the `\p{Z}` regex character property alias are defined in the Unicode
   * Character Database (UCD) at https://unicode.org/ucd/ and https://unicode.org/reports/tr44/
   *
   * The `\p{Z}` regex character property alias is defined specifically in UCD as:
   *
   * Zs |	Space_Separator	    | a space character (of various non-zero widths)
   * Zl	| Line_Separator	    | U+2028 LINE SEPARATOR only
   * Zp	| Paragraph_Separator	| U+2029 PARAGRAPH SEPARATOR only
   * Z	| Separator	          | Zs | Zl | Zp
   * ref: https://unicode.org/reports/tr44/#GC_Values_Table
   *
   *  U+0009  Horizontal Tab (included in \s)
   *  U+000B  Vertical Tab (included in \s)
   *  U+000C  Form feed  (included in \s)
   *  U+000D  Carriage return  (included in \s)
   *  U+0020  space  (included in \s)
   *  U+0085  Next line (included in \u0085)
   *  U+061C  arabic letter mark (included in \u061C)
   *  U+00A0  no-break space (included in \p{Z})
   *  U+00AD  soft-hyphen marker (included in \u00AD)
   *  U+1680  ogham space mark (included in \p{Z})
   *  U+180E  mongolian vowel separator (included in \p{Z} on jdk8 and included in \u180E on jdk11)
   *  U+2000  en quad (included in \p{Z})
   *  U+2001  em quad (included in \p{Z})
   *  U+2002  en space (included in \p{Z})
   *  U+2003  em space (included in \p{Z})
   *  U+2004  three-per-em space (included in \p{Z})
   *  U+2005  four-per-em space (included in \p{Z})
   *  U+2006  six-per-em space (included in \p{Z})
   *  U+2007  figure space (included in \p{Z})
   *  U+2008  punctuation space (included in \p{Z})
   *  U+2009  thin space (included in \p{Z})
   *  U+200A  hair space (included in \p{Z})
   *  U+200B  zero-width (included in \u200B-\u200D)
   *  U+200C  zero-width non-joiner  (included in \u200B-\u200D)
   *  U+200D  zero-width joiner (included in \u200B-\u200D)
   *  U+2028  line separator (included in \p{Z})
   *  U+2029  paragraph separator (included in \p{Z})
   *  U+202F  narrow no-break space (included in \p{Z})
   *  U+205F  medium mathematical space (included in \p{Z})
   *  U+2061  function application (included in \u2061-\u2064)
   *  U+2062  invisible times (included in \u2061-\u2064)
   *  U+2063  invisible separator (included in \u2061-\u2064)
   *  U+2064  invisible plus (included in \u2061-\u2064)
   *  U+2066  left-to-right isolate (included in \u2066-\u2069)
   *  U+2067  right-to-left isolate (included in \u2066-\u2069)
   *  U+2068  first strong isolate (included in \u2066-\u2069)
   *  U+2069  pop directional isolate (included in \u2066-\u2069)
   *  U+206A  inhibit symmetric swapping (included in \u206A-\u206F)
   *  U+206B  activate symmetric swapping (included in \u206A-\u206F)
   *  U+206C  inhibit arabic form shaping (included in \u206A-\u206F)
   *  U+206D  activate arabic form shaping (included in \u206A-\u206F)
   *  U+206E  national digit shapes (included in \u206A-\u206F)
   *  U+206F  nominal digit shapes (included in \u206A-\u206F)
   *  U+2800  braille pattern blank (included in \u2800)
   *  U+3164  hongul filler (see UCD Ignorable_Code_Point)
   *  U+FFA0  halfwidth hongul filler (see UCD Ignorable_Code_Point)
   *  U+3000  ideographic space (included in \p{Z})
   *  U+FEFF  zero-width no-break space (explicitly included in \uFEFF)
   */
  val BlankTextRegex: Regex =
    """[\s\p{Z}\u180E\u0085\u00AD\u061C\u200B-\u200D\u2061-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFEFF\uFFA0]*""".r
  /**
   * Some of the above blank characters are valid at the start of a Tweet (and irrelevant at the end)
   * such as characters that change the direction of text. When trimming from the start
   * or end of text we use a smaller set of characters
   */
  val BlankWhenLeadingOrTrailingRegex: Regex = """[\s\p{Z}\u180E\u0085\u200B\uFEFF]*""".r
  /**
   * Matches consecutive blanks, starting at a newline.
   */
  val ConsecutiveBlankLinesRegex: Regex = ("""\n(""" + BlankTextRegex + """\n){2,}""").r
  val LeadingBlankCharactersRegex: Regex = ("^" + BlankWhenLeadingOrTrailingRegex).r
  val TrailingBlankCharactersRegex: Regex = (BlankWhenLeadingOrTrailingRegex + "$").r
  /**
   * Is the given text empty or contains nothing but whitespace?
   */
  def isBlank(text: String): Boolean =
    BlankTextRegex.pattern.matcher(text).matches()
  /**
   * See http://confluence.local.twitter.com/display/PROD/Displaying+line+breaks+in+Tweets
   *
   * Collapses consecutive blanks lines down to a single blank line.  We can assume that
   * all newlines have already been normalized to just \n, so we don't have to worry about
   * \r\n.
   */
  def collapseBlankLinesModification(text: String): Option[TextModification] =
    replaceAll(text, ConsecutiveBlankLinesRegex, "\n\n")
  def collapseBlankLines(text: String): String =
    ConsecutiveBlankLinesRegex.replaceAllIn(text, "\n\n")
  def trimBlankCharacters(text: String): String =
    TrailingBlankCharactersRegex.replaceFirstIn(
      LeadingBlankCharactersRegex.replaceFirstIn(text, ""),
      ""
    )
  /** Characters that are not visible on their own. Some of these are used in combination with
   * other visible characters, and therefore cannot be always stripped from tweets.
   */
  private[tweettext] val InvisibleCharacters: Seq[Char] =
    Seq(
      '\u2060', '\u2061', '\u2062', '\u2063', '\u2064', '\u206A', '\u206B', '\u206C', '\u206D',
      '\u206D', '\u206E', '\u206F', '\u200C',
      '\u200D', // non-printing chars with valid use in Arabic
      '\u2009', '\u200A', '\u200B', // include very skinny spaces too
      '\ufe00', '\ufe01', '\ufe02', '\ufe03', '\ufe04', '\ufe05', '\ufe06', '\ufe07', '\ufe08',
      '\ufe09', '\ufe0A', '\ufe0B', '\ufe0C', '\ufe0D', '\ufe0E', '\ufe0F',
    )
  private[tweetypie] val InvisibleUnicodePattern: Regex =
    ("^[" + InvisibleCharacters.mkString + "]+$").r
  def isInvisibleChar(input: Char): Boolean = {
    InvisibleCharacters contains input
  }
  /** If string is only "invisible characters", replace full string with whitespace.
   * The purpose of this method is to remove invisible characters when ONLY invisible characters
   * appear between two urls, which can be a security vulnerability due to misleading behavior. These
   * characters cannot be removed as a rule applied to the tweet, because they are used in
   * conjuction with other characters.
   */
  def replaceInvisiblesWithWhitespace(text: String): String = {
    text match {
      case invisible @ InvisibleUnicodePattern() => " " * TweetText.codePointLength(invisible)
      case other => other
    }
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala
@ -0,0 +1,24 @@
 package com.twitter.tweetypie.tweettext
 /**
 * A type class for entities found within a piece of tweet text.
 */
 trait TextEntity[T] {
  def fromIndex(entity: T): Short
  def toIndex(entity: T): Short
  def move(entity: T, fromIndex: Short, toIndex: Short): T
 }
 object TextEntity {
  def fromIndex[T: TextEntity](entity: T): Short =
    implicitly[TextEntity[T]].fromIndex(entity)
  def toIndex[T: TextEntity](entity: T): Short =
    implicitly[TextEntity[T]].toIndex(entity)
  def move[T: TextEntity](entity: T, fromIndex: Short, toIndex: Short): T =
    implicitly[TextEntity[T]].move(entity, fromIndex, toIndex)
  def shift[T: TextEntity](entity: T, offset: Short): T =
    move(entity, (fromIndex(entity) + offset).toShort, (toIndex(entity) + offset).toShort)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala
@ -0,0 +1,232 @@
 package com.twitter.tweetypie.tweettext
 import scala.util.matching.Regex
 object TextModification {
  /**
   * Lift a text into a TextModification where `original` and `updated` text are the same
   * and `replacements` is empty.
   */
  def identity(text: String): TextModification =
    TextModification(original = text, updated = text, replacements = Nil)
  /**
   * Replace each substring that matches the regex with the substitution string, returns a
   * TextModification object that contains the updated text and enough information to also
   * update entity indices.
   *
   * This method should correctly be taking into account surrogate-pairs.  The returned
   * TextModification object has code-point offsets, instead of code-unit offsets.
   */
  def replaceAll(text: String, regex: Regex, substitution: String): Option[TextModification] =
    replaceAll(text, regex -> substitution)
  /**
   * Replaces substrings that match the given `Regex` with the corresonding substitution
   * string.  Returns a `TextModification` that can be used to reindex entities.
   */
  def replaceAll(
    text: String,
    regexAndSubstitutions: (Regex, String)*
  ): Option[TextModification] = {
    val matches =
      (for {
        (r, s) <- regexAndSubstitutions
        m <- r.findAllIn(text).matchData
      } yield (m, s)).sortBy { case (m, _) => m.start }
    if (matches.isEmpty) {
      // no match found, return None to indicate no modifications made
      None
    } else {
      val replacements = List.newBuilder[TextReplacement]
      val indexConverter = new IndexConverter(text)
      // contains the retained text, built up as we walk through the regex matches
      val buf = new StringBuilder(text.length)
      // the number of code-points copied into buf
      var codePointsCopied = Offset.CodePoint(0)
      // always holds the start code-unit offset to copy to buf when we encounter
      // either a regex match or end-of-string.
      var anchor = 0
      import indexConverter.toCodePoints
      for ((m, sub) <- matches) {
        val unchangedText = text.substring(anchor, m.start)
        val unchangedLen = Offset.CodePoint.length(unchangedText)
        val subLen = Offset.CodePoint.length(sub)
        // copies the text upto the regex match run, plus the replacement string
        buf.append(unchangedText).append(sub)
        codePointsCopied += unchangedLen + subLen
        // the offsets indicate the indices of the matched string in the original
        // text, and the indices of the replacement string in the updated string
        replacements +=
          TextReplacement(
            originalFrom = toCodePoints(Offset.CodeUnit(m.start)),
            originalTo = toCodePoints(Offset.CodeUnit(m.end)),
            updatedFrom = codePointsCopied - subLen,
            updatedTo = codePointsCopied
          )
        anchor = m.end
      }
      buf.append(text.substring(anchor))
      Some(TextModification(text, buf.toString, replacements.result()))
    }
  }
  /**
   * Inserts a string at a specified code point offset.
   * Returns a `TextModification` that can be used to reindex entities.
   */
  def insertAt(
    originalText: String,
    insertAt: Offset.CodePoint,
    textToInsert: String
  ): TextModification = {
    val insertAtCodeUnit = insertAt.toCodeUnit(originalText).toInt
    val (before, after) = originalText.splitAt(insertAtCodeUnit)
    val updatedText = s"$before$textToInsert$after"
    val textToInsertLength = TweetText.codePointLength(textToInsert)
    TextModification(
      original = originalText,
      updated = updatedText,
      replacements = List(
        TextReplacement.fromCodePoints(
          originalFrom = insertAt.toInt,
          originalTo = insertAt.toInt,
          updatedFrom = insertAt.toInt,
          updatedTo = insertAt.toInt + textToInsertLength
        ))
    )
  }
 }
 /**
 * Encodes information about insertions/deletions/replacements made to a string, providing
 * the original string, the updated string, and a list of TextReplacement objects
 * that encode the indices of the segments that were changed.  Using this information,
 * it is possible to map an offset into the original string to an offset into the updated
 * string, assuming the text at the offset was not within one of the modified segments.
 *
 * All offsets are code-points, not UTF6 code-units.
 */
 case class TextModification(
  original: String,
  updated: String,
  replacements: List[TextReplacement]) {
  private val originalLen = Offset.CodePoint.length(original)
  /**
   * Using an offset into the original String, computes the equivalent offset into the updated
   * string.  If the offset falls within a segment that was removed/replaced, None is returned.
   */
  def reindex(index: Offset.CodePoint): Option[Offset.CodePoint] =
    reindex(index, Offset.CodePoint(0), replacements)
  /**
   * Reindexes an entity of type T.  Returns the updated entity, or None if either the `fromIndex`
   * or `toIndex` value is now out of range.
   */
  def reindexEntity[T: TextEntity](e: T): Option[T] =
    for {
      from <- reindex(Offset.CodePoint(TextEntity.fromIndex(e)))
      to <- reindex(Offset.CodePoint(TextEntity.toIndex(e) - 1))
    } yield TextEntity.move(e, from.toShort, (to.toShort + 1).toShort)
  /**
   * Reindexes a sequence of entities of type T.  Some entities could be filtered
   * out if they span a region of text that has been removed.
   */
  def reindexEntities[T: TextEntity](es: Seq[T]): Seq[T] =
    for (e <- es; e2 <- reindexEntity(e)) yield e2
  /**
   * Swaps `original` and `updated` text and inverts all `TextReplacement` instances.
   */
  def inverse: TextModification =
    TextModification(updated, original, replacements.map(_.inverse))
  // recursively walks through the list of TextReplacement objects computing
  // offsets to add/substract from 'shift', which accumulates all changes and
  // then gets added to index at the end.
  private def reindex(
    index: Offset.CodePoint,
    shift: Offset.CodePoint,
    reps: List[TextReplacement]
  ): Option[Offset.CodePoint] =
    reps match {
      case Nil =>
        if (index.toInt >= 0 && index <= originalLen)
          Some(index + shift)
        else
          None
      case (r @ TextReplacement(fr, to, _, _)) :: tail =>
        if (index < fr) Some(index + shift)
        else if (index < to) None
        else reindex(index, shift + r.lengthDelta, tail)
    }
 }
 object TextReplacement {
  def fromCodePoints(
    originalFrom: Int,
    originalTo: Int,
    updatedFrom: Int,
    updatedTo: Int
  ): TextReplacement =
    TextReplacement(
      Offset.CodePoint(originalFrom),
      Offset.CodePoint(originalTo),
      Offset.CodePoint(updatedFrom),
      Offset.CodePoint(updatedTo)
    )
 }
 /**
 * Encodes the indices of a segment of text in one string that maps to a replacement
 * segment in an updated version of the text.  The replacement segment could be empty
 * (updatedTo == updatedFrom), indicating the segment was removed.
 *
 * All offsets are code-points, not UTF16 code-units.
 *
 * `originalFrom` and `updatedFrom` are inclusive.
 * `originalTo` and `updatedTo` are exclusive.
 */
 case class TextReplacement(
  originalFrom: Offset.CodePoint,
  originalTo: Offset.CodePoint,
  updatedFrom: Offset.CodePoint,
  updatedTo: Offset.CodePoint) {
  def originalLength: Offset.CodePoint = originalTo - originalFrom
  def updatedLength: Offset.CodePoint = updatedTo - updatedFrom
  def lengthDelta: Offset.CodePoint = updatedLength - originalLength
  def shiftOriginal(offset: Offset.CodePoint): TextReplacement =
    copy(originalFrom = originalFrom + offset, originalTo = originalTo + offset)
  def shiftUpdated(offset: Offset.CodePoint): TextReplacement =
    copy(updatedFrom = updatedFrom + offset, updatedTo = updatedTo + offset)
  def shift(offset: Offset.CodePoint): TextReplacement =
    TextReplacement(
      originalFrom + offset,
      originalTo + offset,
      updatedFrom + offset,
      updatedTo + offset
    )
  def inverse: TextReplacement =
    TextReplacement(
      originalFrom = updatedFrom,
      originalTo = updatedTo,
      updatedFrom = originalFrom,
      updatedTo = originalTo
    )
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala
@ -0,0 +1,159 @@
 package com.twitter.tweetypie.tweettext
 import com.twitter.tweetypie.tweettext.TweetText._
 import com.twitter.twittertext.Extractor
 import java.lang.Character
 import scala.annotation.tailrec
 import scala.collection.JavaConverters._
 object Truncator {
  val Ellipsis = "\u2026"
  /**
   * Truncate tweet text for a retweet. If the text is longer than
   * either of the length limits, code points are cut off from the end
   * of the text and replaced with an ellipsis. We keep as much of the
   * leading text as possible, subject to these constraints:
   *
   * - There are no more than `MaxDisplayLength` characters.
   *
   * - When converted to UTF-8, the result does not exceed `MaxByteLength`.
   *
   * - We do not break within a single grapheme cluster.
   *
   * The input is assumed to be partial HTML-encoded and may or may
   * not be NFC normalized. The result will be partial HTML-encoded
   * and will be NFC normalized.
   */
  def truncateForRetweet(input: String): String = truncateWithEllipsis(input, Ellipsis)
  /**
   * Truncate to [[com.twitter.tweetypie.tweettext.TweetText#OrginalMaxDisplayLength]] display
   * units, using "..." as an ellipsis. The resulting text is guaranteed to pass our tweet length
   * check, but it is not guaranteed to fit in a SMS message.
   */
  def truncateForSms(input: String): String = truncateWithEllipsis(input, "...")
  /**
   * Check the length of the given text, and truncate it if it is longer
   * than the allowed length for a Tweet. The result of this method will
   * always have:
   *
   * - Display length <= OriginalMaxDisplayLength.
   * - Length when encoded as UTF-8 <= OriginalMaxUtf8Length.
   *
   * If the input would violate this, then the text will be
   * truncated. When the text is truncated, it will be truncated such
   * that:
   *
   * - Grapheme clusters will not be split.
   * - The last character before the ellipsis will not be a whitespace
   *   character.
   * - The ellipsis text will be appended to the end.
   */
  private[this] def truncateWithEllipsis(input: String, ellipsis: String): String = {
    val text = nfcNormalize(input)
    val truncateAt =
      truncationPoint(text, OriginalMaxDisplayLength, OriginalMaxUtf8Length, Some(ellipsis))
    if (truncateAt.codeUnitOffset.toInt == text.length) text
    else text.take(truncateAt.codeUnitOffset.toInt) + ellipsis
  }
  /**
   * Indicates a potential TruncationPoint in piece of text.
   *
   * @param charOffset the utf-16 character offset of the truncation point
   * @param codePointOffset the offset in code points
   */
  case class TruncationPoint(codeUnitOffset: Offset.CodeUnit, codePointOffset: Offset.CodePoint)
  /**
   * Computes a TruncationPoint for the given text and length constraints.  If `truncated` on
   * the result is `false`, it means the text will fit within the given constraints without
   * truncation.  Otherwise, the result indicates both the character and code-point offsets
   * at which to perform the truncation, and the resulting display length and byte length of
   * the truncated string.
   *
   * Text should be NFC normalized first for best results.
   *
   * @param withEllipsis if true, then the truncation point will be computed so that there is space
   * to append an ellipsis and to still remain within the limits.  The ellipsis is not counted
   * in the returned display and byte lengths.
   *
   * @param atomicUnits may contain a list of ranges that should be treated as atomic unit and
   * not split.  each tuple is half-open range in code points.
   */
  def truncationPoint(
    text: String,
    maxDisplayLength: Int = OriginalMaxDisplayLength,
    maxByteLength: Int = OriginalMaxUtf8Length,
    withEllipsis: Option[String] = None,
    atomicUnits: Offset.Ranges[Offset.CodePoint] = Offset.Ranges.Empty
  ): TruncationPoint = {
    val breakPoints =
      GraphemeIndexIterator
        .ends(text)
        .filterNot(Offset.Ranges.htmlEntities(text).contains)
    val ellipsisDisplayUnits =
      withEllipsis.map(Offset.DisplayUnit.length).getOrElse(Offset.DisplayUnit(0))
    val maxTruncatedDisplayLength = Offset.DisplayUnit(maxDisplayLength) - ellipsisDisplayUnits
    val ellipsisByteLength = withEllipsis.map(Offset.Utf8.length).getOrElse(Offset.Utf8(0))
    val maxTruncatedByteLength = Offset.Utf8(maxByteLength) - ellipsisByteLength
    var codeUnit = Offset.CodeUnit(0)
    var codePoint = Offset.CodePoint(0)
    var displayLength = Offset.DisplayUnit(0)
    var byteLength = Offset.Utf8(0)
    var truncateCodeUnit = codeUnit
    var truncateCodePoint = codePoint
    @tailrec def go(): TruncationPoint =
      if (displayLength.toInt > maxDisplayLength || byteLength.toInt > maxByteLength) {
        TruncationPoint(truncateCodeUnit, truncateCodePoint)
      } else if (codeUnit != truncateCodeUnit &&
        displayLength <= maxTruncatedDisplayLength &&
        byteLength <= maxTruncatedByteLength &&
        (codeUnit.toInt == 0 || !Character.isWhitespace(text.codePointBefore(codeUnit.toInt))) &&
        !atomicUnits.contains(codePoint)) {
        // we can advance the truncation point
        truncateCodeUnit = codeUnit
        truncateCodePoint = codePoint
        go()
      } else if (breakPoints.hasNext) {
        // there are further truncation points to consider
        val nextCodeUnit = breakPoints.next
        codePoint += Offset.CodePoint.count(text, codeUnit, nextCodeUnit)
        displayLength += Offset.DisplayUnit.count(text, codeUnit, nextCodeUnit)
        byteLength += Offset.Utf8.count(text, codeUnit, nextCodeUnit)
        codeUnit = nextCodeUnit
        go()
      } else {
        TruncationPoint(codeUnit, codePoint)
      }
    go()
  }
  /**
   * Truncate the given text, avoiding chopping HTML entities and tweet
   * entities. This should only be used for testing because it performs
   * entity extraction, and so is very inefficient.
   */
  def truncateForTests(
    input: String,
    maxDisplayLength: Int = OriginalMaxDisplayLength,
    maxByteLength: Int = OriginalMaxUtf8Length
  ): String = {
    val text = nfcNormalize(input)
    val extractor = new Extractor
    val entities = extractor.extractEntitiesWithIndices(text)
    extractor.modifyIndicesFromUTF16ToUnicode(text, entities)
    val avoid = Offset.Ranges.fromCodePointPairs(
      entities.asScala.map(e => (e.getStart().intValue, e.getEnd().intValue))
    )
    val truncateAt = truncationPoint(text, maxDisplayLength, maxByteLength, None, avoid)
    text.take(truncateAt.codeUnitOffset.toInt)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala
@ -0,0 +1,62 @@
 package com.twitter.tweetypie.tweettext
 import java.text.Normalizer
 object TweetText {
  /** The original maximum tweet length, taking into account normalization */
  private[tweetypie] val OriginalMaxDisplayLength = 140
  /** Maximum number of visible code points allowed in a tweet when tweet length is counted by code
   * points, taking into account normalization. See also [[MaxVisibleWeightedEmojiLength]].
   */
  private[tweetypie] val MaxVisibleWeightedLength = 280
  /** Maximum number of visible code points allowed in a tweet when tweet length is counted by
   * emoji, taking into account normalization. See also [[MaxVisibleWeightedLength]].
   * 140 is the max number of Emojis, visible, fully-weighted per Twitter's cramming rules
   * 10 is the max number of Code Points per Emoji
   */
  private[tweetypie] val MaxVisibleWeightedEmojiLength = 140 * 10
  /** Maximum number of bytes when truncating tweet text for a retweet.  Originally was the
   * max UTF-8 length when tweets were at most 140 characters.
   * See also [[OriginalMaxDisplayLength]].
   */
  private[tweetypie] val OriginalMaxUtf8Length = 600
  /** Maximum number of bytes for tweet text using utf-8 encoding.
   */
  private[tweetypie] val MaxUtf8Length = 5708
  /** Maximum number of mentions allowed in tweet text.  This is enforced at tweet creation time */
  private[tweetypie] val MaxMentions = 50
  /** Maximum number of urls allowed in tweet text.  This is enforced at tweet creation time */
  private[tweetypie] val MaxUrls = 10
  /** Maximum number of hashtags allowed in tweet text.  This is enforced at tweet creation time */
  private[tweetypie] val MaxHashtags = 50
  /** Maximum number of cashtags allowed in tweet text.  This is enforced at tweet creation time */
  private[tweetypie] val MaxCashtags = 50
  /** Maximum length of a hashtag (not including the '#') */
  private[tweetypie] val MaxHashtagLength = 100
  /**
   * Normalizes the text according to the unicode NFC spec.
   */
  def nfcNormalize(text: String): String = Normalizer.normalize(text, Normalizer.Form.NFC)
  /**
   * Return the number of "characters" in this text. See
   * [[Offset.DisplayUnit]].
   */
  def displayLength(text: String): Int = Offset.DisplayUnit.length(text).toInt
  /**
   * Return the number of Unicode code points in this String.
   */
  def codePointLength(text: String): Int = Offset.CodePoint.length(text).toInt
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD
@ -0,0 +1,76 @@
 scala_library(
    sources = ["*.scala"],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    provides = scala_artifact(
        org = "com.twitter.tweetypie",
        name = "util",
        repo = artifactory,
    ),
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "//:scala-reflect",
        "3rdparty/jvm/commons-codec",
        "3rdparty/jvm/org/apache/thrift:libthrift",
        "finagle/finagle-core/src/main",
        "mediaservices/commons/src/main/thrift:thrift-scala",
        "scrooge/scrooge-serializer/src/main/scala",
        "tweetypie/servo/repo",
        "tweetypie/servo/util",
        "tweetypie/servo/util/src/main/scala:exception",
        "src/scala/com/twitter/takedown/util",
        "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala",
        "src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
        "src/thrift/com/twitter/expandodo:cards-scala",
        "src/thrift/com/twitter/gizmoduck:thrift-scala",
        "src/thrift/com/twitter/servo:servo-exception-scala",
        "src/thrift/com/twitter/spam/rtf:safety-label-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
        "stitch/stitch-core",
        "tweet-util",
        "util/util-core:scala",
    ],
 )
 scala_library(
    name = "EditControlUtil",
    sources = [
        "EditControlUtil.scala",
        "package.scala",
    ],
    compiler_option_sets = ["fatal_warnings"],
    platform = "java8",
    provides = scala_artifact(
        org = "com.twitter.tweetypie",
        name = "util-EditControlUtil",
        repo = artifactory,
    ),
    strict_deps = True,
    tags = ["bazel-compatible"],
    dependencies = [
        "//:scala-reflect",
        "3rdparty/jvm/commons-codec",
        "3rdparty/jvm/org/apache/thrift:libthrift",
        "finagle/finagle-core/src/main",
        "mediaservices/commons/src/main/thrift:thrift-scala",
        "scrooge/scrooge-serializer/src/main/scala",
        "tweetypie/servo/util/src/main/scala:exception",
        "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala",
        "src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
        "src/thrift/com/twitter/expandodo:cards-scala",
        "src/thrift/com/twitter/gizmoduck:thrift-scala",
        "src/thrift/com/twitter/servo:servo-exception-scala",
        "src/thrift/com/twitter/spam/rtf:safety-label-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala",
        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
        "stitch/stitch-core",
        "tweet-util",
        "util/util-core:scala",
    ],
 )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala
@ -0,0 +1,29 @@
 package com.twitter.tweetypie.util
 import com.twitter.escherbird.thriftscala.TweetEntityAnnotation
 import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations
 import com.twitter.tweetypie.thriftscala.Tweet
 object CommunityAnnotation {
  val groupId: Long = 8
  val domainId: Long = 31
  def apply(communityId: Long): TweetEntityAnnotation =
    TweetEntityAnnotation(groupId, domainId, entityId = communityId)
  def unapply(annotation: TweetEntityAnnotation): Option[Long] =
    annotation match {
      case TweetEntityAnnotation(`groupId`, `domainId`, entityId) => Some(entityId)
      case _ => None
    }
  // Returns None instead of Some(Seq()) when there are non-community annotations present
  def additionalFieldsToCommunityIDs(additionalFields: Tweet): Option[Seq[Long]] = {
    additionalFields.escherbirdEntityAnnotations
      .map {
        case EscherbirdEntityAnnotations(entityAnnotations) =>
          entityAnnotations.flatMap(CommunityAnnotation.unapply)
      }.filter(_.nonEmpty)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala
@ -0,0 +1,19 @@
 package com.twitter.tweetypie.util
 import com.twitter.tweetypie.thriftscala.Communities
 object CommunityUtil {
  def communityIds(maybeCommunities: Option[Communities]): Seq[Long] = {
    maybeCommunities match {
      case None =>
        Nil
      case Some(Communities(seq)) =>
        seq
    }
  }
  def hasCommunity(maybeCommunities: Option[Communities]): Boolean = {
    maybeCommunities.exists(_.communityIds.nonEmpty)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala
@ -0,0 +1,112 @@
 package com.twitter.tweetypie
 package util
 import com.twitter.tweetypie.thriftscala._
 object ConversationControls {
  object Create {
    def byInvitation(
      inviteViaMention: Option[Boolean] = None
    ): TweetCreateConversationControl.ByInvitation = TweetCreateConversationControl.ByInvitation(
      TweetCreateConversationControlByInvitation(inviteViaMention = inviteViaMention)
    )
    def community(
      inviteViaMention: Option[Boolean] = None
    ): TweetCreateConversationControl.Community = TweetCreateConversationControl.Community(
      TweetCreateConversationControlCommunity(inviteViaMention = inviteViaMention)
    )
    def followers(
      inviteViaMention: Option[Boolean] = None
    ): TweetCreateConversationControl.Followers = TweetCreateConversationControl.Followers(
      TweetCreateConversationControlFollowers(inviteViaMention = inviteViaMention)
    )
  }
  object Scenario {
    case class CommonScenario(
      createConversationControl: TweetCreateConversationControl,
      descriptionSuffix: String,
      expectedConversationControl: (UserId, Seq[UserId]) => ConversationControl,
      inviteViaMention: Option[Boolean])
    def mkCommunityScenario(inviteViaMention: Option[Boolean]): CommonScenario =
      CommonScenario(
        Create.community(inviteViaMention = inviteViaMention),
        "community",
        expectedConversationControl = (authorId, userIds) => {
          community(userIds, authorId, inviteViaMention)
        },
        inviteViaMention
      )
    def mkByInvitationScenario(inviteViaMention: Option[Boolean]): CommonScenario =
      CommonScenario(
        Create.byInvitation(inviteViaMention = inviteViaMention),
        "invited users",
        expectedConversationControl = (authorId, userIds) => {
          byInvitation(userIds, authorId, inviteViaMention)
        },
        inviteViaMention
      )
    def mkFollowersScenario(inviteViaMention: Option[Boolean]): CommonScenario =
      CommonScenario(
        Create.followers(inviteViaMention = inviteViaMention),
        "followers",
        expectedConversationControl = (authorId, userIds) => {
          followers(userIds, authorId, inviteViaMention)
        },
        inviteViaMention
      )
    val communityScenario = mkCommunityScenario(None)
    val communityInviteViaMentionScenario = mkCommunityScenario(Some(true))
    val byInvitationScenario = mkByInvitationScenario(None)
    val byInvitationInviteViaMentionScenario = mkByInvitationScenario(Some(true))
    val followersScenario = mkFollowersScenario(None)
    val followersInviteViaMentionScenario = mkFollowersScenario(Some(true))
  }
  def byInvitation(
    invitedUserIds: Seq[UserId],
    conversationTweetAuthorId: UserId,
    inviteViaMention: Option[Boolean] = None
  ): ConversationControl =
    ConversationControl.ByInvitation(
      ConversationControlByInvitation(
        conversationTweetAuthorId = conversationTweetAuthorId,
        invitedUserIds = invitedUserIds,
        inviteViaMention = inviteViaMention
      )
    )
  def community(
    invitedUserIds: Seq[UserId],
    conversationTweetAuthorId: UserId,
    inviteViaMention: Option[Boolean] = None
  ): ConversationControl =
    ConversationControl.Community(
      ConversationControlCommunity(
        conversationTweetAuthorId = conversationTweetAuthorId,
        invitedUserIds = invitedUserIds,
        inviteViaMention = inviteViaMention
      )
    )
  def followers(
    invitedUserIds: Seq[UserId],
    conversationTweetAuthorId: UserId,
    inviteViaMention: Option[Boolean] = None
  ): ConversationControl =
    ConversationControl.Followers(
      ConversationControlFollowers(
        conversationTweetAuthorId = conversationTweetAuthorId,
        invitedUserIds = invitedUserIds,
        inviteViaMention = inviteViaMention
      )
    )
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala
@ -0,0 +1,174 @@
 package com.twitter.tweetypie.util
 import com.twitter.servo.util.Gate
 import com.twitter.tweetypie.util.TweetEditFailure.TweetEditInvalidEditControlException
 import com.twitter.tweetypie.util.TweetEditFailure.TweetEditUpdateEditControlException
 import com.twitter.tweetypie.thriftscala.EditControl
 import com.twitter.tweetypie.thriftscala.EditControlEdit
 import com.twitter.tweetypie.thriftscala.EditControlInitial
 import com.twitter.tweetypie.thriftscala.Tweet
 import com.twitter.util.Try
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import com.twitter.util.Time
 import com.twitter.util.Duration
 object EditControlUtil {
  val maxTweetEditsAllowed = 5
  val oldEditTimeWindow = Duration.fromMinutes(30)
  val editTimeWindow = Duration.fromMinutes(60)
  def editControlEdit(
    initialTweetId: TweetId,
    editControlInitial: Option[EditControlInitial] = None
  ): EditControl.Edit =
    EditControl.Edit(
      EditControlEdit(initialTweetId = initialTweetId, editControlInitial = editControlInitial))
  // EditControl for the tweet that is not an edit, that is, any regular tweet we create
  // that can, potentially, be edited later.
  def makeEditControlInitial(
    tweetId: TweetId,
    createdAt: Time,
    setEditWindowToSixtyMinutes: Gate[Unit] = Gate(_ => false)
  ): EditControl.Initial = {
    val editWindow = if (setEditWindowToSixtyMinutes()) editTimeWindow else oldEditTimeWindow
    val initial = EditControlInitial(
      editTweetIds = Seq(tweetId),
      editableUntilMsecs = Some(createdAt.plus(editWindow).inMilliseconds),
      editsRemaining = Some(maxTweetEditsAllowed),
      isEditEligible = defaultIsEditEligible,
    )
    EditControl.Initial(initial)
  }
  // Returns if a given latestTweetId is the latest edit in the EditControl
  def isLatestEdit(
    tweetEditControl: Option[EditControl],
    latestTweetId: TweetId
  ): Try[Boolean] = {
    tweetEditControl match {
      case Some(EditControl.Initial(initial)) =>
        isLatestEditFromEditControlInitial(Some(initial), latestTweetId)
      case Some(EditControl.Edit(edit)) =>
        isLatestEditFromEditControlInitial(
          edit.editControlInitial,
          latestTweetId
        )
      case _ => Throw(TweetEditInvalidEditControlException)
    }
  }
  // Returns if a given latestTweetId is the latest edit in the EditControlInitial
  private def isLatestEditFromEditControlInitial(
    initialTweetEditControl: Option[EditControlInitial],
    latestTweetId: TweetId
  ): Try[Boolean] = {
    initialTweetEditControl match {
      case Some(initial) =>
        Return(latestTweetId == initial.editTweetIds.last)
      case _ => Throw(TweetEditInvalidEditControlException)
    }
  }
  /* Create an updated edit control for an initialTweet given the id of the new edit */
  def editControlForInitialTweet(
    initialTweet: Tweet,
    newEditId: TweetId
  ): Try[EditControl.Initial] = {
    initialTweet.editControl match {
      case Some(EditControl.Initial(initial)) =>
        Return(EditControl.Initial(plusEdit(initial, newEditId)))
      case Some(EditControl.Edit(_)) => Throw(TweetEditUpdateEditControlException)
      case _ =>
        initialTweet.coreData match {
          case Some(coreData) =>
            Return(
              makeEditControlInitial(
                tweetId = initialTweet.id,
                createdAt = Time.fromMilliseconds(coreData.createdAtSecs * 1000),
                setEditWindowToSixtyMinutes = Gate(_ => true)
              )
            )
          case None => Throw(new Exception("Tweet Missing Required CoreData"))
        }
    }
  }
  def updateEditControl(tweet: Tweet, newEditId: TweetId): Try[Tweet] =
    editControlForInitialTweet(tweet, newEditId).map { editControl =>
      tweet.copy(editControl = Some(editControl))
    }
  def plusEdit(initial: EditControlInitial, newEditId: TweetId): EditControlInitial = {
    val newEditTweetIds = (initial.editTweetIds :+ newEditId).distinct.sorted
    val editsCount = newEditTweetIds.size - 1 // as there is the original tweet ID there too.
    initial.copy(
      editTweetIds = newEditTweetIds,
      editsRemaining = Some(maxTweetEditsAllowed - editsCount),
    )
  }
  // The ID of the initial Tweet if this is an edit
  def getInitialTweetIdIfEdit(tweet: Tweet): Option[TweetId] = tweet.editControl match {
    case Some(EditControl.Edit(edit)) => Some(edit.initialTweetId)
    case _ => None
  }
  // If this is the first tweet in an edit chain, return the same tweet id
  // otherwise return the result of getInitialTweetId
  def getInitialTweetId(tweet: Tweet): TweetId =
    getInitialTweetIdIfEdit(tweet).getOrElse(tweet.id)
  def isInitialTweet(tweet: Tweet): Boolean =
    getInitialTweetId(tweet) == tweet.id
  // Extracted just so that we can easily track where the values of isEditEligible is coming from.
  private def defaultIsEditEligible: Option[Boolean] = Some(true)
  // returns true if it's an edit of a Tweet or an initial Tweet that's been edited
  def isEditTweet(tweet: Tweet): Boolean =
    tweet.editControl match {
      case Some(eci: EditControl.Initial) if eci.initial.editTweetIds.size <= 1 => false
      case Some(_: EditControl.Initial) | Some(_: EditControl.Edit) | Some(
            EditControl.UnknownUnionField(_)) =>
        true
      case None => false
    }
  // returns true if editControl is from an edit of a Tweet
  // returns false for any other state, including edit intial.
  def isEditControlEdit(editControl: EditControl): Boolean = {
    editControl match {
      case _: EditControl.Edit | EditControl.UnknownUnionField(_) => true
      case _ => false
    }
  }
  def getEditTweetIds(editControl: Option[EditControl]): Try[Seq[TweetId]] = {
    editControl match {
      case Some(EditControl.Edit(EditControlEdit(_, Some(eci)))) =>
        Return(eci.editTweetIds)
      case Some(EditControl.Initial(initial)) =>
        Return(initial.editTweetIds)
      case _ =>
        Throw(new Exception(s"EditControlInitial not found in $editControl"))
    }
  }
 }
 object TweetEditFailure {
  abstract class TweetEditException(msg: String) extends Exception(msg)
  case object TweetEditGetInitialEditControlException
      extends TweetEditException("Initial EditControl not found")
  case object TweetEditInvalidEditControlException
      extends TweetEditException("Invalid EditControl for initial_tweet")
  case object TweetEditUpdateEditControlException
      extends TweetEditException("Invalid Edit Control Update")
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala
@ -0,0 +1,45 @@
 package com.twitter.tweetypie.util
 import com.twitter.finagle.Backoff
 import com.twitter.finagle.service.RetryPolicy
 import com.twitter.finagle.service.RetryPolicy.RetryableWriteException
 import com.twitter.servo.exception.thriftscala.ServerError
 import com.twitter.util.Duration
 import com.twitter.util.Throw
 import com.twitter.util.TimeoutException
 import com.twitter.util.Try
 object RetryPolicyBuilder {
  /**
   * Retry on any exception.
   */
  def anyFailure[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] =
    RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) {
      case Throw(_) => true
    }
  /**
   * Retry on com.twitter.util.TimeoutException
   */
  def timeouts[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] =
    RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) {
      case Throw(_: TimeoutException) => true
    }
  /**
   * Retry on com.twitter.finagle.service.RetryableWriteExceptions
   */
  def writes[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] =
    RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) {
      case Throw(RetryableWriteException(_)) => true
    }
  /**
   * Retry on com.twitter.servo.exception.thriftscala.ServerError
   */
  def servoServerError[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] =
    RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) {
      case Throw(ServerError(_)) => true
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala
@ -0,0 +1,54 @@
 package com.twitter.tweetypie.util
 import com.twitter.finagle.stats.Stat
 import com.twitter.finagle.stats.StatsReceiver
 import com.twitter.servo
 import com.twitter.util.Return
 import com.twitter.util.Throw
 import com.twitter.stitch.Stitch
 object StitchUtils {
  def trackLatency[T](latencyStat: Stat, s: => Stitch[T]): Stitch[T] = {
    Stitch
      .time(s)
      .map {
        case (res, duration) =>
          latencyStat.add(duration.inMillis)
          res
      }
      .lowerFromTry
  }
  def observe[T](statsReceiver: StatsReceiver, apiName: String): Stitch[T] => Stitch[T] = {
    val stats = statsReceiver.scope(apiName)
    val requests = stats.counter("requests")
    val success = stats.counter("success")
    val latencyStat = stats.stat("latency_ms")
    val exceptionCounter =
      new servo.util.ExceptionCounter(stats, "failures")
    stitch =>
      trackLatency(latencyStat, stitch)
        .respond {
          case Return(_) =>
            requests.incr()
            success.incr()
          case Throw(e) =>
            exceptionCounter(e)
            requests.incr()
        }
  }
  def translateExceptions[T](
    stitch: Stitch[T],
    translateException: PartialFunction[Throwable, Throwable]
  ): Stitch[T] =
    stitch.rescue {
      case t if translateException.isDefinedAt(t) =>
        Stitch.exception(translateException(t))
      case t => Stitch.exception(t)
    }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala
@ -0,0 +1,31 @@
 package com.twitter.tweetypie.util
 /**
 * Escape a String into Java or Scala String literal syntax (adds the
 * surrounding quotes.)
 *
 * This is primarily for printing Strings for debugging or logging.
 */
 object StringLiteral extends (String => String) {
  private[this] val ControlLimit = ' '
  private[this] val PrintableLimit = '\u007e'
  private[this] val Specials =
    Map('\n' -> 'n', '\r' -> 'r', '\t' -> 't', '"' -> '"', '\'' -> '\'', '\\' -> '\\')
  def apply(str: String): String = {
    val s = new StringBuilder(str.length)
    s.append('"')
    var i = 0
    while (i < str.length) {
      val c = str(i)
      Specials.get(c) match {
        case None =>
          if (c >= ControlLimit && c <= PrintableLimit) s.append(c)
          else s.append("\\u%04x".format(c.toInt))
        case Some(special) => s.append('\\').append(special)
      }
      i += 1
    }
    s.append('"').result
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala
@ -0,0 +1,49 @@
 package com.twitter.tweetypie.util
 import com.twitter.takedown.util.TakedownReasons
 import com.twitter.takedown.util.TakedownReasons.CountryCode
 import com.twitter.tseng.withholding.thriftscala.TakedownReason
 import com.twitter.tseng.withholding.thriftscala.UnspecifiedReason
 import com.twitter.tweetypie.thriftscala.Tweet
 /**
 * Contains tweetypie-specific utils for working with TakedownReasons.
 */
 object Takedowns {
  type CountryCode = String
  /**
   * Take a list of [[TakedownReason]] and return values to be saved on the [[Tweet]] in fields
   * tweetypieOnlyTakedownCountryCode and tweetypieOnlyTakedownReason.
   *
   * - tweetypieOnlyTakedownCountryCode contains the country_code of all UnspecifiedReasons
   * - tweetypieOnlyTakedownReason contains all other reasons
   */
  def partitionReasons(reasons: Seq[TakedownReason]): (Seq[String], Seq[TakedownReason]) = {
    val (unspecifiedReasons, specifiedReasons) = reasons.partition {
      case TakedownReason.UnspecifiedReason(UnspecifiedReason(_)) => true
      case _ => false
    }
    val unspecifiedCountryCodes = unspecifiedReasons.collect(TakedownReasons.reasonToCountryCode)
    (unspecifiedCountryCodes, specifiedReasons)
  }
  def fromTweet(t: Tweet): Takedowns =
    Takedowns(
      Seq
        .concat(
          t.tweetypieOnlyTakedownCountryCodes
            .getOrElse(Nil).map(TakedownReasons.countryCodeToReason),
          t.tweetypieOnlyTakedownReasons.getOrElse(Nil)
        ).toSet
    )
 }
 /**
 * This class is used to ensure the caller has access to both the full list of reasons as well
 * as the backwards-compatible list of country codes.
 */
 case class Takedowns(reasons: Set[TakedownReason]) {
  def countryCodes: Set[CountryCode] = reasons.collect(TakedownReasons.reasonToCountryCode)
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala
@ -0,0 +1,17 @@
 package com.twitter.tweetypie.util
 import com.twitter.tweetypie.thriftscala.TransientCreateContext
 import com.twitter.tweetypie.thriftscala.TweetCreateContextKey
 import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeCreatorId
 import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeIsLive
 object TransientContextUtil {
  def toAdditionalContext(context: TransientCreateContext): Map[TweetCreateContextKey, String] =
    Seq
      .concat(
        context.periscopeIsLive.map(PeriscopeIsLive -> _.toString), // "true" or "false"
        context.periscopeCreatorId.map(PeriscopeCreatorId -> _.toString) // userId
      )
      .toMap
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala
@ -0,0 +1,203 @@
 package com.twitter.tweetypie.util
 import com.twitter.conversions.DurationOps._
 import com.twitter.logging.Logger
 import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata
 import com.twitter.scrooge.BinaryThriftStructSerializer
 import com.twitter.servo.cache.ScopedCacheKey
 import com.twitter.servo.util.Transformer
 import com.twitter.tweetypie.thriftscala.PostTweetRequest
 import com.twitter.util.Base64Long
 import com.twitter.util.Time
 import java.nio.ByteBuffer
 import java.security.MessageDigest
 import org.apache.commons.codec.binary.Base64
 import scala.collection.immutable.SortedMap
 object TweetCreationLock {
  case class Key private (userId: UserId, typeCode: String, idOrMd5: String)
      extends ScopedCacheKey("t", "locker", 2, Base64Long.toBase64(userId), typeCode, idOrMd5) {
    def uniquenessId: Option[String] =
      if (typeCode == Key.TypeCode.UniquenessId) Some(idOrMd5) else None
  }
  object Key {
    private[this] val log = Logger(getClass)
    object TypeCode {
      val SourceTweetId = "r"
      val UniquenessId = "u"
      val PostTweetRequest = "p"
    }
    private[this] val serializer = BinaryThriftStructSerializer(PostTweetRequest)
    // normalize the representation of no media ids.
    private[util] def sanitizeMediaUploadIds(mediaUploadIds: Option[Seq[Long]]) =
      mediaUploadIds.filter(_.nonEmpty)
    /**
     * Request deduplication depends on the hash of a serialized Thrift value.
     *
     * In order to guarantee that a Map has a reproducible serialized form,
     * it's necessary to fix the ordering of its keys.
     */
    private[util] def sanitizeMediaMetadata(
      mediaMetadata: Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]]
    ): Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]] =
      mediaMetadata.map(m => SortedMap(m.toSeq: _*))
    /**
     *  Make sure to sanitize request fields with map/set since serialized
     *  bytes ordering is not guaranteed for same thrift values.
     */
    private[util] def sanitizeRequest(request: PostTweetRequest): PostTweetRequest =
      PostTweetRequest(
        userId = request.userId,
        text = request.text,
        createdVia = "",
        inReplyToTweetId = request.inReplyToTweetId,
        geo = request.geo,
        mediaUploadIds = sanitizeMediaUploadIds(request.mediaUploadIds),
        narrowcast = request.narrowcast,
        nullcast = request.nullcast,
        additionalFields = request.additionalFields,
        attachmentUrl = request.attachmentUrl,
        mediaMetadata = sanitizeMediaMetadata(request.mediaMetadata),
        conversationControl = request.conversationControl,
        underlyingCreativesContainerId = request.underlyingCreativesContainerId,
        editOptions = request.editOptions,
        noteTweetOptions = request.noteTweetOptions
      )
    def bySourceTweetId(userId: UserId, sourceTweetId: TweetId): Key =
      Key(userId, TypeCode.SourceTweetId, Base64Long.toBase64(sourceTweetId))
    def byRequest(request: PostTweetRequest): Key =
      request.uniquenessId match {
        case Some(uqid) =>
          byUniquenessId(request.userId, uqid)
        case None =>
          val sanitized = sanitizeRequest(request)
          val sanitizedBytes = serializer.toBytes(sanitized)
          val digested = MessageDigest.getInstance("SHA-256").digest(sanitizedBytes)
          val base64Digest = Base64.encodeBase64String(digested)
          val key = Key(request.userId, TypeCode.PostTweetRequest, base64Digest)
          log.ifDebug(s"Generated key $key from request:\n${sanitized}")
          key
      }
    /**
     * Key for tweets that have a uniqueness id set. There is only one
     * namespace of uniqueness ids, across all clients. They are
     * expected to be Snowflake ids, in order to avoid cache
     * collisions.
     */
    def byUniquenessId(userId: UserId, uniquenessId: Long): Key =
      Key(userId, TypeCode.UniquenessId, Base64Long.toBase64(uniquenessId))
  }
  /**
   * The state of tweet creation for a given Key (request).
   */
  sealed trait State
  object State {
    /**
     * There is no tweet creation currently in progress. (This can
     * either be represented by no entry in the cache, or this special
     * marker. This lets us use checkAndSet for deletion to avoid
     * accidentally overwriting other process' values.)
     */
    case object Unlocked extends State
    /**
     * Some process is attempting to create the tweet.
     */
    case class InProgress(token: Long, timestamp: Time) extends State
    /**
     * The tweet has already been successfully created, and has the
     * specified id.
     */
    case class AlreadyCreated(tweetId: TweetId, timestamp: Time) extends State
    /**
     * When stored in cache, each state is prefixed by a byte
     * indicating the type of the entry.
     */
    object TypeCode {
      val Unlocked: Byte = 0.toByte
      val InProgress: Byte = 1.toByte // + random long + timestamp
      val AlreadyCreated: Byte = 2.toByte // + tweet id + timestamp
    }
    private[this] val BufferSize = 17 // type byte + 64-bit value + 64-bit timestamp
    // Constant buffer to use for storing the serialized form on
    // Unlocked.
    private[this] val UnlockedBuf = Array[Byte](TypeCode.Unlocked)
    // Store the serialization function in a ThreadLocal so that we can
    // reuse the buffer between invocations.
    private[this] val threadLocalSerialize = new ThreadLocal[State => Array[Byte]] {
      override def initialValue(): State => Array[Byte] = {
        // Allocate the thread-local state
        val ary = new Array[Byte](BufferSize)
        val buf = ByteBuffer.wrap(ary)
        {
          case Unlocked => UnlockedBuf
          case InProgress(token, timestamp) =>
            buf.clear()
            buf
              .put(TypeCode.InProgress)
              .putLong(token)
              .putLong(timestamp.sinceEpoch.inNanoseconds)
            ary
          case AlreadyCreated(tweetId, timestamp) =>
            buf.clear()
            buf
              .put(TypeCode.AlreadyCreated)
              .putLong(tweetId)
              .putLong(timestamp.sinceEpoch.inNanoseconds)
            ary
        }
      }
    }
    /**
     * Convert this State to the cache representation.
     */
    private[this] def toBytes(state: State): Array[Byte] =
      threadLocalSerialize.get()(state)
    /**
     * Convert this byte array into a LockState.
     *
     * @throws RuntimeException if the buffer is not of the right size
     *   and format
     */
    private[this] def fromBytes(bytes: Array[Byte]): State = {
      val buf = ByteBuffer.wrap(bytes)
      val result = buf.get() match {
        case TypeCode.Unlocked => Unlocked
        case TypeCode.InProgress => InProgress(buf.getLong(), buf.getLong().nanoseconds.afterEpoch)
        case TypeCode.AlreadyCreated =>
          AlreadyCreated(buf.getLong(), buf.getLong().nanoseconds.afterEpoch)
        case other => throw new RuntimeException("Invalid type code: " + other)
      }
      if (buf.remaining != 0) {
        throw new RuntimeException("Extra data in buffer: " + bytes)
      }
      result
    }
    /**
     * How to serialize the State for storage in cache.
     */
    val Serializer: Transformer[State, Array[Byte]] =
      Transformer[State, Array[Byte]](tTo = toBytes _, tFrom = fromBytes _)
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala
@ -0,0 +1,506 @@
 package com.twitter.tweetypie.util
 import com.twitter.dataproducts.enrichments.thriftscala.ProfileGeoEnrichment
 import com.twitter.expandodo.thriftscala._
 import com.twitter.mediaservices.commons.thriftscala.MediaKey
 import com.twitter.mediaservices.commons.tweetmedia.thriftscala._
 import com.twitter.servo.data.Lens
 import com.twitter.spam.rtf.thriftscala.SafetyLabel
 import com.twitter.tseng.withholding.thriftscala.TakedownReason
 import com.twitter.tweetypie.thriftscala._
 import com.twitter.tweetypie.unmentions.thriftscala.UnmentionData
 object TweetLenses {
  import Lens.checkEq
  def requireSome[A, B](l: Lens[A, Option[B]]): Lens[A, B] =
    checkEq[A, B](
      a => l.get(a).get,
      (a, b) => l.set(a, Some(b))
    )
  def tweetLens[A](get: Tweet => A, set: (Tweet, A) => Tweet): Lens[Tweet, A] =
    checkEq[Tweet, A](get, set)
  val id: Lens[Tweet, TweetId] =
    tweetLens[TweetId](_.id, (t, id) => t.copy(id = id))
  val coreData: Lens[Tweet, Option[TweetCoreData]] =
    tweetLens[Option[TweetCoreData]](_.coreData, (t, coreData) => t.copy(coreData = coreData))
  val requiredCoreData: Lens[Tweet, TweetCoreData] =
    requireSome(coreData)
  val optUrls: Lens[Tweet, Option[Seq[UrlEntity]]] =
    tweetLens[Option[Seq[UrlEntity]]](_.urls, (t, urls) => t.copy(urls = urls))
  val urls: Lens[Tweet, Seq[UrlEntity]] =
    tweetLens[Seq[UrlEntity]](_.urls.toSeq.flatten, (t, urls) => t.copy(urls = Some(urls)))
  val optMentions: Lens[Tweet, Option[Seq[MentionEntity]]] =
    tweetLens[Option[Seq[MentionEntity]]](_.mentions, (t, v) => t.copy(mentions = v))
  val mentions: Lens[Tweet, Seq[MentionEntity]] =
    tweetLens[Seq[MentionEntity]](_.mentions.toSeq.flatten, (t, v) => t.copy(mentions = Some(v)))
  val unmentionData: Lens[Tweet, Option[UnmentionData]] =
    tweetLens[Option[UnmentionData]](_.unmentionData, (t, v) => t.copy(unmentionData = v))
  val optHashtags: Lens[Tweet, Option[Seq[HashtagEntity]]] =
    tweetLens[Option[Seq[HashtagEntity]]](_.hashtags, (t, v) => t.copy(hashtags = v))
  val hashtags: Lens[Tweet, Seq[HashtagEntity]] =
    tweetLens[Seq[HashtagEntity]](_.hashtags.toSeq.flatten, (t, v) => t.copy(hashtags = Some(v)))
  val optCashtags: Lens[Tweet, Option[Seq[CashtagEntity]]] =
    tweetLens[Option[Seq[CashtagEntity]]](_.cashtags, (t, v) => t.copy(cashtags = v))
  val cashtags: Lens[Tweet, Seq[CashtagEntity]] =
    tweetLens[Seq[CashtagEntity]](_.cashtags.toSeq.flatten, (t, v) => t.copy(cashtags = Some(v)))
  val optMedia: Lens[Tweet, Option[Seq[MediaEntity]]] =
    tweetLens[Option[Seq[MediaEntity]]](_.media, (t, v) => t.copy(media = v))
  val media: Lens[Tweet, Seq[MediaEntity]] =
    tweetLens[Seq[MediaEntity]](_.media.toSeq.flatten, (t, v) => t.copy(media = Some(v)))
  val mediaKeys: Lens[Tweet, Seq[MediaKey]] =
    tweetLens[Seq[MediaKey]](
      _.mediaKeys.toSeq.flatten,
      {
        case (t, v) => t.copy(mediaKeys = Some(v))
      })
  val place: Lens[Tweet, Option[Place]] =
    tweetLens[Option[Place]](
      _.place,
      {
        case (t, v) => t.copy(place = v)
      })
  val quotedTweet: Lens[Tweet, Option[QuotedTweet]] =
    tweetLens[Option[QuotedTweet]](
      _.quotedTweet,
      {
        case (t, v) => t.copy(quotedTweet = v)
      })
  val selfThreadMetadata: Lens[Tweet, Option[SelfThreadMetadata]] =
    tweetLens[Option[SelfThreadMetadata]](
      _.selfThreadMetadata,
      {
        case (t, v) => t.copy(selfThreadMetadata = v)
      })
  val composerSource: Lens[Tweet, Option[ComposerSource]] =
    tweetLens[Option[ComposerSource]](
      _.composerSource,
      {
        case (t, v) => t.copy(composerSource = v)
      })
  val deviceSource: Lens[Tweet, Option[DeviceSource]] =
    tweetLens[Option[DeviceSource]](
      _.deviceSource,
      {
        case (t, v) => t.copy(deviceSource = v)
      })
  val perspective: Lens[Tweet, Option[StatusPerspective]] =
    tweetLens[Option[StatusPerspective]](
      _.perspective,
      {
        case (t, v) => t.copy(perspective = v)
      })
  val cards: Lens[Tweet, Option[Seq[Card]]] =
    tweetLens[Option[Seq[Card]]](
      _.cards,
      {
        case (t, v) => t.copy(cards = v)
      })
  val card2: Lens[Tweet, Option[Card2]] =
    tweetLens[Option[Card2]](
      _.card2,
      {
        case (t, v) => t.copy(card2 = v)
      })
  val cardReference: Lens[Tweet, Option[CardReference]] =
    tweetLens[Option[CardReference]](
      _.cardReference,
      {
        case (t, v) => t.copy(cardReference = v)
      })
  val spamLabel: Lens[Tweet, Option[SafetyLabel]] =
    tweetLens[Option[SafetyLabel]](
      _.spamLabel,
      {
        case (t, v) => t.copy(spamLabel = v)
      })
  val lowQualityLabel: Lens[Tweet, Option[SafetyLabel]] =
    tweetLens[Option[SafetyLabel]](
      _.lowQualityLabel,
      {
        case (t, v) => t.copy(lowQualityLabel = v)
      })
  val nsfwHighPrecisionLabel: Lens[Tweet, Option[SafetyLabel]] =
    tweetLens[Option[SafetyLabel]](
      _.nsfwHighPrecisionLabel,
      {
        case (t, v) => t.copy(nsfwHighPrecisionLabel = v)
      })
  val bounceLabel: Lens[Tweet, Option[SafetyLabel]] =
    tweetLens[Option[SafetyLabel]](
      _.bounceLabel,
      {
        case (t, v) => t.copy(bounceLabel = v)
      })
  val takedownCountryCodes: Lens[Tweet, Option[Seq[String]]] =
    tweetLens[Option[Seq[String]]](
      _.takedownCountryCodes,
      {
        case (t, v) => t.copy(takedownCountryCodes = v)
      })
  val takedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] =
    tweetLens[Option[Seq[TakedownReason]]](
      _.takedownReasons,
      {
        case (t, v) => t.copy(takedownReasons = v)
      })
  val contributor: Lens[Tweet, Option[Contributor]] =
    tweetLens[Option[Contributor]](
      _.contributor,
      {
        case (t, v) => t.copy(contributor = v)
      })
  val mediaTags: Lens[Tweet, Option[TweetMediaTags]] =
    tweetLens[Option[TweetMediaTags]](
      _.mediaTags,
      {
        case (t, v) => t.copy(mediaTags = v)
      })
  val mediaTagMap: Lens[Tweet, Map[MediaId, Seq[MediaTag]]] =
    tweetLens[Map[MediaId, Seq[MediaTag]]](
      _.mediaTags.map { case TweetMediaTags(tagMap) => tagMap.toMap }.getOrElse(Map.empty),
      (t, v) => {
        val cleanMap = v.filter { case (_, tags) => tags.nonEmpty }
        t.copy(mediaTags = if (cleanMap.nonEmpty) Some(TweetMediaTags(cleanMap)) else None)
      }
    )
  val escherbirdEntityAnnotations: Lens[Tweet, Option[EscherbirdEntityAnnotations]] =
    tweetLens[Option[EscherbirdEntityAnnotations]](
      _.escherbirdEntityAnnotations,
      {
        case (t, v) => t.copy(escherbirdEntityAnnotations = v)
      })
  val communities: Lens[Tweet, Option[Communities]] =
    tweetLens[Option[Communities]](
      _.communities,
      {
        case (t, v) => t.copy(communities = v)
      })
  val tweetypieOnlyTakedownCountryCodes: Lens[Tweet, Option[Seq[String]]] =
    tweetLens[Option[Seq[String]]](
      _.tweetypieOnlyTakedownCountryCodes,
      {
        case (t, v) => t.copy(tweetypieOnlyTakedownCountryCodes = v)
      })
  val tweetypieOnlyTakedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] =
    tweetLens[Option[Seq[TakedownReason]]](
      _.tweetypieOnlyTakedownReasons,
      {
        case (t, v) => t.copy(tweetypieOnlyTakedownReasons = v)
      })
  val profileGeo: Lens[Tweet, Option[ProfileGeoEnrichment]] =
    tweetLens[Option[ProfileGeoEnrichment]](
      _.profileGeoEnrichment,
      (t, v) => t.copy(profileGeoEnrichment = v)
    )
  val visibleTextRange: Lens[Tweet, Option[TextRange]] =
    tweetLens[Option[TextRange]](
      _.visibleTextRange,
      {
        case (t, v) => t.copy(visibleTextRange = v)
      })
  val selfPermalink: Lens[Tweet, Option[ShortenedUrl]] =
    tweetLens[Option[ShortenedUrl]](
      _.selfPermalink,
      {
        case (t, v) => t.copy(selfPermalink = v)
      })
  val extendedTweetMetadata: Lens[Tweet, Option[ExtendedTweetMetadata]] =
    tweetLens[Option[ExtendedTweetMetadata]](
      _.extendedTweetMetadata,
      {
        case (t, v) => t.copy(extendedTweetMetadata = v)
      })
  object TweetCoreData {
    val userId: Lens[TweetCoreData, UserId] = checkEq[TweetCoreData, UserId](
      _.userId,
      { (c, v) =>
        // Pleases the compiler: https://github.com/scala/bug/issues/9171
        val userId = v
        c.copy(userId = userId)
      })
    val text: Lens[TweetCoreData, String] = checkEq[TweetCoreData, String](
      _.text,
      { (c, v) =>
        // Pleases the compiler: https://github.com/scala/bug/issues/9171
        val text = v
        c.copy(text = text)
      })
    val createdAt: Lens[TweetCoreData, TweetId] =
      checkEq[TweetCoreData, Long](_.createdAtSecs, (c, v) => c.copy(createdAtSecs = v))
    val createdVia: Lens[TweetCoreData, String] =
      checkEq[TweetCoreData, String](
        _.createdVia,
        {
          case (c, v) => c.copy(createdVia = v)
        })
    val hasTakedown: Lens[TweetCoreData, Boolean] =
      checkEq[TweetCoreData, Boolean](
        _.hasTakedown,
        {
          case (c, v) => c.copy(hasTakedown = v)
        })
    val nullcast: Lens[TweetCoreData, Boolean] =
      checkEq[TweetCoreData, Boolean](
        _.nullcast,
        {
          case (c, v) => c.copy(nullcast = v)
        })
    val nsfwUser: Lens[TweetCoreData, Boolean] =
      checkEq[TweetCoreData, Boolean](
        _.nsfwUser,
        {
          case (c, v) => c.copy(nsfwUser = v)
        })
    val nsfwAdmin: Lens[TweetCoreData, Boolean] =
      checkEq[TweetCoreData, Boolean](
        _.nsfwAdmin,
        {
          case (c, v) => c.copy(nsfwAdmin = v)
        })
    val reply: Lens[TweetCoreData, Option[Reply]] =
      checkEq[TweetCoreData, Option[Reply]](
        _.reply,
        {
          case (c, v) => c.copy(reply = v)
        })
    val share: Lens[TweetCoreData, Option[Share]] =
      checkEq[TweetCoreData, Option[Share]](
        _.share,
        {
          case (c, v) => c.copy(share = v)
        })
    val narrowcast: Lens[TweetCoreData, Option[Narrowcast]] =
      checkEq[TweetCoreData, Option[Narrowcast]](
        _.narrowcast,
        {
          case (c, v) => c.copy(narrowcast = v)
        })
    val directedAtUser: Lens[TweetCoreData, Option[DirectedAtUser]] =
      checkEq[TweetCoreData, Option[DirectedAtUser]](
        _.directedAtUser,
        {
          case (c, v) => c.copy(directedAtUser = v)
        })
    val conversationId: Lens[TweetCoreData, Option[ConversationId]] =
      checkEq[TweetCoreData, Option[ConversationId]](
        _.conversationId,
        {
          case (c, v) => c.copy(conversationId = v)
        })
    val placeId: Lens[TweetCoreData, Option[String]] =
      checkEq[TweetCoreData, Option[String]](
        _.placeId,
        {
          case (c, v) => c.copy(placeId = v)
        })
    val geoCoordinates: Lens[TweetCoreData, Option[GeoCoordinates]] =
      checkEq[TweetCoreData, Option[GeoCoordinates]](
        _.coordinates,
        (c, v) => c.copy(coordinates = v)
      )
    val trackingId: Lens[TweetCoreData, Option[TweetId]] =
      checkEq[TweetCoreData, Option[Long]](
        _.trackingId,
        {
          case (c, v) => c.copy(trackingId = v)
        })
    val hasMedia: Lens[TweetCoreData, Option[Boolean]] =
      checkEq[TweetCoreData, Option[Boolean]](
        _.hasMedia,
        {
          case (c, v) => c.copy(hasMedia = v)
        })
  }
  val counts: Lens[Tweet, Option[StatusCounts]] =
    tweetLens[Option[StatusCounts]](
      _.counts,
      {
        case (t, v) => t.copy(counts = v)
      })
  object StatusCounts {
    val retweetCount: Lens[StatusCounts, Option[TweetId]] =
      checkEq[StatusCounts, Option[Long]](
        _.retweetCount,
        (c, retweetCount) => c.copy(retweetCount = retweetCount)
      )
    val replyCount: Lens[StatusCounts, Option[TweetId]] =
      checkEq[StatusCounts, Option[Long]](
        _.replyCount,
        (c, replyCount) => c.copy(replyCount = replyCount)
      )
    val favoriteCount: Lens[StatusCounts, Option[TweetId]] =
      checkEq[StatusCounts, Option[Long]](
        _.favoriteCount,
        {
          case (c, v) => c.copy(favoriteCount = v)
        })
    val quoteCount: Lens[StatusCounts, Option[TweetId]] =
      checkEq[StatusCounts, Option[Long]](
        _.quoteCount,
        {
          case (c, v) => c.copy(quoteCount = v)
        })
  }
  val userId: Lens[Tweet, UserId] = requiredCoreData andThen TweetCoreData.userId
  val text: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.text
  val createdVia: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.createdVia
  val createdAt: Lens[Tweet, ConversationId] = requiredCoreData andThen TweetCoreData.createdAt
  val reply: Lens[Tweet, Option[Reply]] = requiredCoreData andThen TweetCoreData.reply
  val share: Lens[Tweet, Option[Share]] = requiredCoreData andThen TweetCoreData.share
  val narrowcast: Lens[Tweet, Option[Narrowcast]] =
    requiredCoreData andThen TweetCoreData.narrowcast
  val directedAtUser: Lens[Tweet, Option[DirectedAtUser]] =
    requiredCoreData andThen TweetCoreData.directedAtUser
  val conversationId: Lens[Tweet, Option[ConversationId]] =
    requiredCoreData andThen TweetCoreData.conversationId
  val placeId: Lens[Tweet, Option[String]] = requiredCoreData andThen TweetCoreData.placeId
  val geoCoordinates: Lens[Tweet, Option[GeoCoordinates]] =
    requiredCoreData andThen TweetCoreData.geoCoordinates
  val hasTakedown: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.hasTakedown
  val nsfwAdmin: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwAdmin
  val nsfwUser: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwUser
  val nullcast: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nullcast
  val trackingId: Lens[Tweet, Option[ConversationId]] =
    requiredCoreData andThen TweetCoreData.trackingId
  val hasMedia: Lens[Tweet, Option[Boolean]] = requiredCoreData andThen TweetCoreData.hasMedia
  object CashtagEntity {
    val indices: Lens[CashtagEntity, (Short, Short)] =
      checkEq[CashtagEntity, (Short, Short)](
        t => (t.fromIndex, t.toIndex),
        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
      )
    val text: Lens[CashtagEntity, String] =
      checkEq[CashtagEntity, String](_.text, (t, text) => t.copy(text = text))
  }
  object HashtagEntity {
    val indices: Lens[HashtagEntity, (Short, Short)] =
      checkEq[HashtagEntity, (Short, Short)](
        t => (t.fromIndex, t.toIndex),
        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
      )
    val text: Lens[HashtagEntity, String] =
      checkEq[HashtagEntity, String](_.text, (t, text) => t.copy(text = text))
  }
  object MediaEntity {
    val indices: Lens[MediaEntity, (Short, Short)] =
      checkEq[MediaEntity, (Short, Short)](
        t => (t.fromIndex, t.toIndex),
        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
      )
    val mediaSizes: Lens[MediaEntity, collection.Set[MediaSize]] =
      checkEq[MediaEntity, scala.collection.Set[MediaSize]](
        _.sizes,
        (m, sizes) => m.copy(sizes = sizes)
      )
    val url: Lens[MediaEntity, String] =
      checkEq[MediaEntity, String](
        _.url,
        {
          case (t, v) => t.copy(url = v)
        })
    val mediaInfo: Lens[MediaEntity, Option[MediaInfo]] =
      checkEq[MediaEntity, Option[MediaInfo]](
        _.mediaInfo,
        {
          case (t, v) => t.copy(mediaInfo = v)
        })
  }
  object MentionEntity {
    val indices: Lens[MentionEntity, (Short, Short)] =
      checkEq[MentionEntity, (Short, Short)](
        t => (t.fromIndex, t.toIndex),
        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
      )
    val screenName: Lens[MentionEntity, String] =
      checkEq[MentionEntity, String](
        _.screenName,
        (t, screenName) => t.copy(screenName = screenName)
      )
  }
  object UrlEntity {
    val indices: Lens[UrlEntity, (Short, Short)] =
      checkEq[UrlEntity, (Short, Short)](
        t => (t.fromIndex, t.toIndex),
        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
      )
    val url: Lens[UrlEntity, String] =
      checkEq[UrlEntity, String](_.url, (t, url) => t.copy(url = url))
  }
  object Contributor {
    val screenName: Lens[Contributor, Option[String]] =
      checkEq[Contributor, Option[String]](
        _.screenName,
        (c, screenName) => c.copy(screenName = screenName)
      )
  }
  object Reply {
    val inReplyToScreenName: Lens[Reply, Option[String]] =
      checkEq[Reply, Option[String]](
        _.inReplyToScreenName,
        (c, inReplyToScreenName) => c.copy(inReplyToScreenName = inReplyToScreenName)
      )
    val inReplyToStatusId: Lens[Reply, Option[TweetId]] =
      checkEq[Reply, Option[TweetId]](
        _.inReplyToStatusId,
        (c, inReplyToStatusId) => c.copy(inReplyToStatusId = inReplyToStatusId)
      )
  }
 }
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala
@ -0,0 +1,18 @@
 package com.twitter.tweetypie.util
 import com.twitter.tweetutil.TweetPermalink
 import com.twitter.tweetypie.thriftscala._
 object TweetPermalinkUtil {
  def lastQuotedTweetPermalink(tweet: Tweet): Option[(UrlEntity, TweetPermalink)] =
    lastQuotedTweetPermalink(TweetLenses.urls.get(tweet))
  def lastQuotedTweetPermalink(urls: Seq[UrlEntity]): Option[(UrlEntity, TweetPermalink)] =
    urls.flatMap(matchQuotedTweetPermalink).lastOption
  def matchQuotedTweetPermalink(entity: UrlEntity): Option[(UrlEntity, TweetPermalink)] =
    for {
      expanded <- entity.expanded
      permalink <- TweetPermalink.parse(expanded)
    } yield (entity, permalink)
 }
--- a/Show More
+++ b/Show More