Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DUCK] repeat 增加 重复区间+时间点/区间 的能力 #230

Merged
merged 5 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@

package com.xiaomi.duckling.dimension.time

import java.time.temporal.ChronoUnit

import com.xiaomi.duckling.Types._
import com.xiaomi.duckling.dimension.implicits._
import com.xiaomi.duckling.dimension.time.enums.Grain
import com.xiaomi.duckling.dimension.time.enums.Grain._
import com.xiaomi.duckling.dimension.time.form.{TimeOfDay, Month => _}
import com.xiaomi.duckling.dimension.time.helper.TimeDataHelpers.hour
import com.xiaomi.duckling.dimension.time.predicates.{TimeDatePredicate, TimePredicate}
import com.xiaomi.duckling.dimension.time.Types.InstantValue

object Helpers {

Expand Down Expand Up @@ -66,4 +69,20 @@ object Helpers {
} else td2
}

def countGrains(start: InstantValue, end: InstantValue): Int = {
val a = start.datetime.toLocalDatetime
val b = end.datetime.toLocalDatetime
val n = start.grain match {
case NoGrain | Second => ChronoUnit.SECONDS.between(a, b)
case Minute => ChronoUnit.MINUTES.between(a, b)
case Hour => ChronoUnit.HOURS.between(a, b)
case Day => ChronoUnit.DAYS.between(a, b)
case Week => ChronoUnit.WEEKS.between(a, b)
case Month => ChronoUnit.MONTHS.between(a, b)
case Quarter => ChronoUnit.MONTHS.between(a, b) / 3
case Year => ChronoUnit.YEARS.between(a, b)
}
n.intValue()
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -373,8 +373,7 @@ trait Rules extends DimRules {
case TimeOfDay(_, _) => Day
case _ => NoGrain
}
} else if (g == Year && td.timeGrain == Month) Year
else NoGrain
} else g
val coarseDate = cycleNth(g, sign * v, roundGrain)
tt(intersect(coarseDate, td))
} else None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ package com.xiaomi.duckling.dimension.time.date

import scalaz.std.string.parseInt
import java.time.LocalDate
import java.util.regex.Pattern

import scala.collection.mutable
import scala.util.Try

import com.xiaomi.duckling.Types._
Expand All @@ -33,7 +35,7 @@ import com.xiaomi.duckling.dimension.time.duration.{isADecade, Duration, Duratio
import com.xiaomi.duckling.dimension.time.enums.Grain._
import com.xiaomi.duckling.dimension.time.enums.Hint.{NoHint, RecentNominal, YearMonth}
import com.xiaomi.duckling.dimension.time.enums.IntervalType.{Closed, Open}
import com.xiaomi.duckling.dimension.time.enums.{Grain, Hint}
import com.xiaomi.duckling.dimension.time.enums.{Grain, Hint, IntervalType}
import com.xiaomi.duckling.dimension.time.helper.TimeDataHelpers._
import com.xiaomi.duckling.dimension.time.predicates.{EndOfGrainPredicate, SequencePredicate, TimeDatePredicate, _}
import com.xiaomi.duckling.dimension.time.{GrainWrapper, TimeData}
Expand Down Expand Up @@ -398,4 +400,38 @@ trait Rules extends DimRules {
}
}
)

private val WeekX = Pattern.compile("(周|星期)([一二三四五六日天])")
private val WeekDayStr = "12345671234567"

val ruleWeekXyz = Rule(
name = "周一周二",
pattern = List("((周|星期)[一二三四五六日天]){2,}".regex),
prod = regexMatch {case text :: _ =>
val m = WeekX.matcher(text)
var pos = 0
val days = mutable.Buffer[Int]()
while (m.find(pos)) {
val d = m.group(2) match {
case "一" => 1
case "二" => 2
case "三" => 3
case "四" => 4
case "五" => 5
case "六" => 6
case "日" | "天" => 7
}
days += d
pos = m.end()
}
WeekDayStr.indexOf(days.mkString("")) match {
case -1 => None
case _ =>
val td = TimeData(
TimeIntervalsPredicate(IntervalType.Closed, TimeDatePredicate(dayOfWeek = days.head), TimeDatePredicate(dayOfWeek = days.last), beforeEndOfInterval = true),
timeGrain = Day)
Token(Date, td)
}
}
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,11 @@ object predicates {
case _ => false
}

val isInterval: Predicate = {
case Token(Time, td: TimeData) => td.timePred.isInstanceOf[TimeIntervalsPredicate]
case _ => false
}

val isIntervalOfDay: Predicate = {
case Token(Time, td: TimeData) =>
td.form match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,20 @@ import com.xiaomi.duckling.dimension.time.grain.TimeGrain
import com.xiaomi.duckling.dimension.time.Types._
import com.xiaomi.duckling.dimension.time.enums.Grain
import com.xiaomi.duckling.dimension.time.form.Form
import com.xiaomi.duckling.dimension.time.Helpers.countGrains

case object Repeat extends Dimension with Rules {
override val name: String = "Repeat"

override val dimDependents: List[Dimension] = List(TimeGrain, Duration, Time)
}

case class RepeatData(interval: Option[DurationData] = None,
case class RepeatData(interval: Option[DurationData] = None, // 间隔,如果与其它的配合,表示外层间隔
n: Option[Int] = None,
start: Option[TimeData] = None,
workdayType: Option[WorkdayType] = None)
workdayType: Option[WorkdayType] = None,
repeatGrain: Option[Grain] = None,
repeatNFromInterval: Option[TimeData] = None)
extends Resolvable {

override def resolve(context: Context,
Expand All @@ -48,7 +51,19 @@ case class RepeatData(interval: Option[DurationData] = None,
}
case None => (None, true)
}
if (success) Some(RepeatValue(interval, n, instant, workdayType), false)
val repeatN = repeatNFromInterval match {
case Some(intervalTimeData) =>
intervalTimeData.resolve(context, options) match {
case Some((tv: TimeValue, _)) =>
tv.timeValue match {
case IntervalValue(start, end) => Some(countGrains(start, end))
case _ => None
}
case _ => None
}
case _ => None
}
if (success) Some(RepeatValue(interval, n.orElse(repeatN), instant, repeatGrain = repeatNFromInterval.map(_.timeGrain), workdayType), false)
else None
}
}
Expand All @@ -63,6 +78,7 @@ case class RepeatData(interval: Option[DurationData] = None,
case class RepeatValue(interval: Option[DurationData] = None,
n: Option[Int] = None,
start: Option[(TimeValue, Option[Form])] = None,
repeatGrain: Option[Grain] = None,
workdayType: Option[WorkdayType] = None)
extends ResolvedValue {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ import com.xiaomi.duckling.Types._
import com.xiaomi.duckling.dimension.DimRules
import com.xiaomi.duckling.dimension.implicits._
import com.xiaomi.duckling.dimension.matcher.GroupMatch
import com.xiaomi.duckling.dimension.matcher.Prods.{regexMatch, singleRegexMatch}
import com.xiaomi.duckling.dimension.time.{form, Time, TimeData}
import com.xiaomi.duckling.dimension.matcher.Prods.regexMatch
import com.xiaomi.duckling.dimension.time.{form, GrainWrapper, Time, TimeData}
import com.xiaomi.duckling.dimension.time.duration.{Duration, DurationData}
import com.xiaomi.duckling.dimension.time.enums.{Grain, Hint}
import com.xiaomi.duckling.dimension.time.predicates.{isAPartOfDay, isATimeOfDay, isHint, isNotLatent, isTimeDatePredicate, IntersectTimePredicate, TimeDatePredicate, TimeIntervalsPredicate}
import com.xiaomi.duckling.dimension.time.helper.TimeDataHelpers.intersect
import com.xiaomi.duckling.dimension.time.predicates._

trait Rules extends DimRules with LazyLogging {
/**
Expand Down Expand Up @@ -90,10 +91,12 @@ trait Rules extends DimRules with LazyLogging {
}
)

private val predicateEveryGrain = "每(一个?|个)?(年度?|月|周|星期|天|小时|分钟)的?".regex

val ruleEveryGrainDatetime = Rule(
name = "<every> <grain> <datetime>",
pattern = List(
"每(一个?|个)?(年度?|月|周|星期|天|小时|分钟)的?".regex,
predicateEveryGrain,
and(isDimension(Time), isNotLatent).predicate),
prod = tokens {
case Token(_, GroupMatch(_ :: _ :: grainToken :: _)) :: Token(_, td: TimeData) :: _
Expand Down Expand Up @@ -155,4 +158,54 @@ trait Rules extends DimRules with LazyLogging {
workdaysTime(rd, td)
}
)

// 周一到周五早上八点
val ruleIntervalTime = Rule(
name = "<interval> <time/interval>",
pattern = List(isInterval.predicate, isDimension(Time).predicate),
prod = tokens { case Token(Time, outer: TimeData) :: Token(Time, inner: TimeData):: _ if outer.timeGrain > inner.timeGrain =>
val oInterval = outer.timePred.asInstanceOf[TimeIntervalsPredicate]
// start
val start = intersect(inner, TimeData(oInterval.p1, timeGrain=outer.timeGrain))
Token(Repeat, RepeatData(start = start, repeatNFromInterval = outer))
}
)

// 周一到周五早上的八点
val ruleIntervalTime1 = Rule(
name = "<interval> 的 <time/interval>",
pattern = List(isInterval.predicate, "的".regex, isDimension(Time).predicate),
prod = tokens { case Token(Time, outer: TimeData) :: _ :: Token(Time, inner: TimeData):: _ if outer.timeGrain > inner.timeGrain =>
val oInterval = outer.timePred.asInstanceOf[TimeIntervalsPredicate]
val start = intersect(inner.copy(hint = Hint.NoHint), TimeData(oInterval.p1, timeGrain=outer.timeGrain).copy(hint = Hint.NoHint))
Token(Repeat, RepeatData(start = start, repeatNFromInterval = outer))
}
)

val ruleEveryRepeat = Rule(
name = "每 x <repeat>",
pattern = List(predicateEveryGrain, isDimension(Repeat).predicate),
prod = tokens { case Token(_, GroupMatch(_ :: _ :: grainToken :: _)) :: Token(_, repeat: RepeatData):: _ =>
val grainHint: Option[Grain] = toGrain(grainToken)
(grainHint, repeat.repeatNFromInterval) match {
case (Some(everyGrain), Some(td)) if everyGrain >= td.timeGrain =>
val interval = DurationData(1, grainHint.getOrElse(everyGrain))
Token(Repeat, repeat.copy(interval = interval))
case _ => None
}
}
)

val ruleEveryRepeat1 = Rule(
name = "每 <repeat>",
pattern = List("每个?".regex, isDimension(Repeat).predicate),
prod = tokens { case _ :: Token(_, repeat: RepeatData):: _ =>
repeat.repeatNFromInterval match {
case Some(td) if td.timePred.maxGrain.isDefined =>
val interval = DurationData(1, td.timePred.maxGrain.get)
Token(Repeat, repeat.copy(interval = interval))
case _ => None
}
}
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ object Examples extends DimExamples {
implicit def _toTuple(tv: TimeValue) = Option(tv, None: Option[Form])

override def pairs: List[(ResolvedValue, List[String])] = List(
(RepeatValue(interval = DurationData(1, Week), start = (datetimeInterval(
new DuckDateTime(LocalDateTime.of(2013, 2, 18, 12, 0, 0)),
new DuckDateTime(LocalDateTime.of(2013, 2, 18, 18, 0, 0)),
Hour), None), repeatGrain = Day, n = 3), List("每个周一到周三下午", "每周一到周三的下午")),
(RepeatValue(start = (datetimeInterval(
new DuckDateTime(LocalDateTime.of(2013, 2, 18, 12, 0, 0)),
new DuckDateTime(LocalDateTime.of(2013, 2, 18, 18, 0, 0)),
Hour), None), repeatGrain = Day, n = 3), List("周一到周三下午", "周一到周三的下午")),
(RepeatValue(DurationData(1, Day, schema = "P1D")), List("每天")),
(RepeatValue(DurationData(1, Week, schema = "P1W")), List("每周")),
(RepeatValue(DurationData(15, Minute, schema = "PT15M")), List("每隔15分钟", "隔15分钟")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package com.xiaomi.duckling.task
import java.nio.charset.StandardCharsets
import java.time.ZonedDateTime

import scala.util.control.Breaks.break

import org.fusesource.jansi.Ansi
import org.jline.reader.{EndOfFileException, LineReader, LineReaderBuilder}
import org.jline.reader.impl.completer._
Expand Down Expand Up @@ -143,7 +145,7 @@ object NaiveBayesConsole extends LazyLogging {
} else (false, options)
}

def round(reader: LineReader, options: Options): Options = {
def round(reader: LineReader, options: Options): (Boolean, Options) = {
reader.getTerminal.flush()

val line =
Expand All @@ -152,7 +154,6 @@ object NaiveBayesConsole extends LazyLogging {
} catch {
case ex: EndOfFileException =>
logger.info("bye!")
System.exit(0)
""
}

Expand All @@ -175,11 +176,11 @@ object NaiveBayesConsole extends LazyLogging {
}
}

_options
(line == "", _options)
}

def run(): Unit = {
var options = Options(targets = Set(), withLatent = false)
var options: Options = Options(targets = Set(), withLatent = false)
options.rankOptions.setRanker(Ranker.NaiveBayes)
options.rankOptions.setWinnerOnly(true)
options.rankOptions.setCombinationRank(false)
Expand All @@ -197,7 +198,9 @@ object NaiveBayesConsole extends LazyLogging {

val reader = buildLineReader()
while (true) {
options = round(reader, options)
round(reader, options) match {
case (true, _) => break()
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion duckling-fork-chinese/project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.8.0
sbt.version=1.10.1
Loading