Commit 27428cd7 authored by Nathan/Eilisha Shiraini's avatar Nathan/Eilisha Shiraini
Browse files

Added garbage collection to the encoder

parent 1a2eb3d8
......@@ -57,6 +57,11 @@ cdef class Encoder:
self.leaves.erase(it)
self.leaves.push_back(newLeaf)
cdef removeLeaf(self, EncodingPath* leaf):
it = find(self.leaves.begin(), self.leaves.end(), leaf)
if it != self.leaves.end():
self.leaves.erase(it)
cdef void addData(self, string data):
cdef uint64_t datasize = data.size()
cdef char nextchar
......@@ -84,6 +89,7 @@ cdef class Encoder:
bincount = self.countNextBinary(data.substr(i))
self.addBinary(data.substr(i, bincount))
skip += bincount - 1
self.cleanup()
cdef string getOutputString(self):
cdef list[FixLenInteger] words
......@@ -112,6 +118,18 @@ cdef class Encoder:
shortestlength = it.second
return shortest
cdef void cleanup(self):
cdef map[const EncodingPath*, uint64_t] lengthtable
cdef uint64_t shortestlength = numeric_limits[uint64_t].max()
dereference(self.root).buildLengthTable(lengthtable)
for it in lengthtable:
if it.second < shortestlength:
shortestlength = it.second
for it in lengthtable:
if it.second > shortestlength:
self.removeLeaf(it.first)
del it.first
cdef uint16_t countNextBinary(self, string data):
cdef uint16_t i = 0
while data.at(i) > 127 and i > data.size():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment