Skip to content

Commit

Permalink
Merge pull request #147 from realratchet/master
Browse files Browse the repository at this point in the history
Column selector API update
  • Loading branch information
realratchet authored Mar 14, 2024
2 parents 3bdf7ff + 8db4938 commit 8f34953
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 33 deletions.
24 changes: 23 additions & 1 deletion nimlite/funcs/column_selector/casters.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ type
FromDateTime* = object
ToTime* = object

template uncastable() = raise newException(ValueError, "uncastable")
template uncastable*() = raise newException(ValueError, "uncastable")

proc newMkCaster(caster: NimNode, isPyCaster: bool): NimNode =
expectKind(caster, nnkLambda)
Expand Down Expand Up @@ -210,6 +210,28 @@ template obj2prim(v: PY_ObjectND) =
of K_DATETIME: FromDateTime.fnCast(R)(PY_DateTime(v).value)
of K_NONETYPE: uncastable()

template castFromType*(v: KindObjectND): typedesc =
return case v:
of K_BOOLEAN: bool
of K_INT: int
of K_FLOAT: float
of K_STRING: string
of K_DATE: FromDate
of K_TIME: PY_Time
of K_DATETIME: FromDateTime
of K_NONETYPE: uncastable()

template castToType*(v: KindObjectND) =
case v:
of K_BOOLEAN: bool
of K_INT: int
of K_FLOAT: float
of K_STRING: string
of K_DATE: ToDate
of K_TIME: ToTime
of K_DATETIME: ToDateTime
else: uncastable()

mkCasters:
proc(v: PY_ObjectND) =
bool = v.obj2prim()
Expand Down
66 changes: 61 additions & 5 deletions nimlite/funcs/column_selector/makepage.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import std/[enumerate, unicode, tables, times]
import std/[enumerate, unicode, tables, times, macros]
import ../../numpy
import ../../pytypes
from mask import Mask
Expand All @@ -25,7 +25,55 @@ proc canBeNone*(page: BaseNDArray): bool =

return canBeNone

template makePage*[T: typed](dt: typedesc[T], page: BaseNDArray, mask: var seq[Mask], reasonLst: var seq[string], conv: proc, allowEmpty: bool, originalName: string, desiredName: string, desiredType: KindObjectND): BaseNDArray =
macro fetchIter(nIter: typeof(nil) | proc, nPage: typed, nBody: untyped) =
var nStmts = newNimNode(nnkStmtList)
var nIterator: NimNode

case nIter.kind:
of nnkNilLit:
nIterator = newDotExpr(
newIdentNode("page"),
newIdentNode("pgIter")
)
of nnkSym:
let nMyIter = newIdentNode("myIter")
let nVar = newNimNode(nnkVarSection)
.add(
newNimNode(nnkIdentDefs)
.add(
nMyIter,
newEmptyNode(),
newCall(
nIter,
nPage
)
)
)

nStmts.add(nVar)
nIterator = newCall(nMyIter)
else: raise newException(Exception, "not implemented: " & $nIter.kind)


let nIterValues = newNimNode(nnkVarTuple)
.add(newIdentNode("i"))
.add(newIdentNode("v"))
.add(newEmptyNode())

let nIteratorEnumerated = newNimNode(nnkCall)
.add(newIdentNode("enumerate"))
.add(nIterator)

let nFor = newNimNode(nnkForStmt)
.add(nIterValues)
.add(nIteratorEnumerated)
.add(nBody)

nStmts.add(nFor)

return nStmts

template makePage*[T: typed](dt: typedesc[T], page: BaseNDArray, mask: var seq[Mask], reasonLst: var seq[string], conv: proc, allowEmpty: bool, originalName: string, desiredName: string, desiredType: KindObjectND, iter: proc | typeof(nil) = nil): T =
template getTypeUserName(t: KindObjectND): string =
case t:
of K_BOOLEAN: "bool"
Expand Down Expand Up @@ -86,7 +134,12 @@ template makePage*[T: typed](dt: typedesc[T], page: BaseNDArray, mask: var seq[M
dtypes[KindObjectND.K_NONETYPE] = dtypes[KindObjectND.K_NONETYPE] + 1
mask[i] = Mask.VALID

for (i, v) in enumerate(page.pgIter):
# when not (iter is typeof(nil)):
# var myIter = iter(page)
# else:
# var myIter: bool = false

fetchIter(iter, page):
# 0. Check if already invalid, if so, skip
if mask[i] == Mask.INVALID:
continue
Expand Down Expand Up @@ -154,7 +207,10 @@ template makePage*[T: typed](dt: typedesc[T], page: BaseNDArray, mask: var seq[M
elif page is DateTimeNDArray:
let strRepr = v.format(fmtDateTime)
else:
let strRepr = $v
when v is PY_ObjectND:
let strRepr = v.toRepr
else:
let strRepr = $v

reasonLst[i] = createCastErrorReason(strRepr, inTypeKind)
mask[i] = Mask.INVALID
Expand All @@ -178,4 +234,4 @@ template makePage*[T: typed](dt: typedesc[T], page: BaseNDArray, mask: var seq[M
else:
let res = T(shape: @[buf.len], buf: buf)

BaseNDArray res
res
2 changes: 1 addition & 1 deletion nimlite/funcs/column_selector/sliceconv.nim
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import infos



proc putPage(page: BaseNDArray, infos: var Table[string, nimpy.PyObject], colName: string, col: ColSliceInfo): void {.inline.} =
proc putPage*(page: BaseNDArray, infos: var Table[string, nimpy.PyObject], colName: string, col: ColSliceInfo): void {.inline.} =
let (dir, pid) = col

infos[colName] = newPyPage(pid, dir, page.len, page.getPageTypes())
Expand Down
44 changes: 25 additions & 19 deletions nimlite/numpy.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,9 @@ proc save*(self: BaseNDArray, path: string): void =
of K_DATETIME: DateTimeNDArray(self).save(path)
of K_OBJECT: ObjectNDArray(self).save(path)

proc save*(self: BaseNDArray, workdir: string, pid: string): void =
self.save(workdir & "/pages/" & pid & ".npy")

proc save*(self: BaseNDArray, page: nimpy.PyObject): void =
let m = modules()

Expand All @@ -1052,13 +1055,14 @@ proc type2PyType(`type`: KindObjectND): nimpy.PyObject =
of K_DATETIME: return m.datetime.classes.DateTimeClass

proc newPyPage*(id: string, path: string, len: int, dtypes: Table[KindObjectND, int]): nimpy.PyObject =
let pyDtypes = modules().builtins.classes.DictClass!()
let m = modules()
let pyDtypes = m.builtins.classes.DictClass!()

for (dt, n) in dtypes.pairs:
let obj = dt.type2PyType()
pyDtypes[obj] = n

let pg = modules().tablite.modules.base.classes.SimplePageClass!(id, path, len, pyDtypes)
let pg = m.tablite.modules.base.classes.SimplePageClass!(id, path, len, pyDtypes)

return pg

Expand All @@ -1069,11 +1073,12 @@ proc newPyPage*(self: BaseNDArray, workdir: string): nimpy.PyObject =
return self.newPyPage(workdir, pid)

proc newPyPage*(self: BaseNDArray): nimpy.PyObject =
let tabliteConfig = modules().tablite.modules.config.classes.Config
let m = modules()
let tabliteConfig = m.tablite.modules.config.classes.Config
let wpid = tabliteConfig.pid.to(string)
let tablitDir = Path(modules().builtins.toStr(tabliteConfig.workdir))
let tablitDir = Path(m.builtins.toStr(tabliteConfig.workdir))
let workdir = string (tablitDir / Path(wpid))
let pid = modules().tablite.modules.base.classes.SimplePageClass.next_id(workdir).to(string)
let pid = m.tablite.modules.base.classes.SimplePageClass.next_id(workdir).to(string)

return self.newPyPage(workdir, pid)

Expand Down Expand Up @@ -1268,7 +1273,7 @@ template validatePageKind(page: BaseNDArray, expected: KindObjectND) =
if cnt > 0:
raise newException(ValueError, "invalid page kind, expected only '" & $expected & "' but got: " & $types)

iterator iterateIntPage(page: BaseNDArray): int =
iterator iterateIntPage*(page: BaseNDArray): int =
template collectValues(page: typed) =
for v in page.pgIter:
yield int v
Expand All @@ -1285,7 +1290,7 @@ iterator iterateIntPage(page: BaseNDArray): int =

else: raise newException(ValueError, "invalid page type: " & $page.kind)

iterator iterateFloatPage(page: BaseNDArray): float =
iterator iterateFloatPage*(page: BaseNDArray): float =
template collectValues(page: typed) =
for v in page.pgIter:
yield float v
Expand All @@ -1300,19 +1305,20 @@ iterator iterateFloatPage(page: BaseNDArray): float =

else: raise newException(ValueError, "invalid page type: " & $page.kind)

iterator iterateBooleanPage(page: BaseNDArray): bool =
case page.kind:
of K_BOOLEAN:
for v in BooleanNDArray(page).pgIter:
yield v
of K_OBJECT:
page.validatePageKind(K_BOOLEAN)
for v in ObjectNDArray(page).pgIter:
yield PY_Boolean(v).value
else: raise newException(ValueError, "invalid page type: " & $page.kind)
iterator iterateBooleanPage*(page: BaseNDArray): bool =
yield true
# case page.kind:
# of K_BOOLEAN:
# for v in BooleanNDArray(page).pgIter:
# yield v
# of K_OBJECT:
# page.validatePageKind(K_BOOLEAN)
# for v in ObjectNDArray(page).pgIter:
# yield PY_Boolean(v).value
# else: raise newException(ValueError, "invalid page type: " & $page.kind)


iterator iterateStringPage(page: BaseNDArray): string =
iterator iterateStringPage*(page: BaseNDArray): string =
case page.kind:
of K_STRING:
for v in UnicodeNDArray(page).pgIter:
Expand All @@ -1323,7 +1329,7 @@ iterator iterateStringPage(page: BaseNDArray): string =
yield PY_String(v).value
else: raise newException(ValueError, "invalid page type: " & $page.kind)

iterator iterateObjectPage(page: BaseNDArray): PY_ObjectND =
iterator iterateObjectPage*(page: BaseNDArray): PY_ObjectND =
case page.kind:
of K_BOOLEAN: (for v in BooleanNDArray(page).pgIter: yield newPY_Object(v))
of K_INT8: (for v in Int8NDArray(page).pgIter: yield newPY_Object(v))
Expand Down
12 changes: 8 additions & 4 deletions nimlite/pytypes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@ proc str2ObjKind*(val: string): KindObjectND =
else: raise newException(ValueError, "invalid object kind: " & val)

type Shape* = seq[int]
type PY_Object* = ref object of RootObj
type PY_ObjectND* {.requiresInit.} = ref object of PY_Object
kind*: KindObjectND
type
PyObj = object of RootObj
PY_Object* = ref PyObj
type
PyObjND {.requiresInit.} = object of PyObj
kind*: KindObjectND
PY_ObjectND* = ref PyObjND
type PY_Boolean* = ref object of PY_ObjectND
value*: bool
type PY_Int* = ref object of PY_ObjectND
Expand Down Expand Up @@ -65,7 +69,7 @@ method toRepr*(self: PY_NoneType): string = "None"
method toRepr*(self: PY_Boolean): string = $self.value
method toRepr*(self: PY_Int): string = $self.value
method toRepr*(self: PY_Float): string = $self.value
method toRepr*(self: PY_String): string = "'" & self.value & "'"
method toRepr*(self: PY_String): string = self.value
method toRepr*(self: PY_Date): string = self.value.format(fmtDate)
method toRepr*(self: PY_Time): string = self.value.duration2Date.format(fmtTime)
method toRepr*(self: PY_DateTime): string = self.value.format(fmtDateTime)
Expand Down
4 changes: 2 additions & 2 deletions tablite/nimlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,14 @@ def column_select(table: K, cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskMana
tbl_fail = T({k: [] for k in failed_column_data})

converted = []
step_size = 45 / max(page_count - 1, 1)
step_size = 45 / max(page_count, 1)

if is_mp:
class WrapUpdate:
def update(self, n):
pbar.update(n * step_size)

with TaskManager(cpu_count=cpu_count) as tm:
with TaskManager(min(cpu_count, page_count)) as tm:
res = tm.execute(list(tasks), pbar=WrapUpdate())

if any(isinstance(r, str) for r in res):
Expand Down
2 changes: 1 addition & 1 deletion tablite/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
major, minor, patch = 2023, 10, 8
major, minor, patch = 2023, 10, 9
__version_info__ = (major, minor, patch)
__version__ = ".".join(str(i) for i in __version_info__)

0 comments on commit 8f34953

Please sign in to comment.