# -*- Mode: Python -*-
#
# removed from automata.py - minimization doesn't do much for a lexer,
# since you have to keep the final states separate in order to distinguish
# the different kinds of tokens.
#
# Note: the trick used here seems to work, though.
# Instead of starting your partition with final and non-final states,
# divide the final set in a way that retains the distinction between
# the sub-expressions (wrapped in TOKEN by lexer.py)
def minimize (self, collapse_finals=True):
# XXX using minimization for a lexer is probably not worth it.
n_states = len (self.states)
W ('self.finals=%r\n' % (self.finals,))
final = self.finals
t = {}
for fs, ch, ts in self.dfa:
t2 = t.get (fs, {})
t2[ch] = ts
t[fs] = t2
W ('minimize - input dfa:\n')
pp (t)
W ('minimize - finals: %r\n' % (final,))
# make sure final states exist
for f in final:
if not t.has_key (f):
t[f] = {}
# partition
non_final = set (t.keys()).difference (final)
if not non_final or not final:
W ('warning: dfa is all-final or all-non-final\n')
if collapse_finals:
# standard: partition into final and non-final
part = [final, non_final]
else:
# leave each final state distinguished
# (useful for lexing)
part = [[x] for x in final] + [non_final]
last_size = len(part)
W ('part=%r\n' % (part,))
# churn
while 1:
# make state->partition map
m = {}
for i in range(len(part)):
for s in part[i]:
m[s] = i
W ('----------------------------------------\n')
W ('part=%r\n' % (part,))
new_part = []
for sub in part:
if len(sub) == 1:
# can't refine a one-element partition
new_part.append (sub)
else:
refined = {}
# <refined> is a map from:
# ((sym0, ts0), (sym1, ts1), ...) => [<fs0>, <fs1>, ...]
# where each <ts> is a to-state in the current
# partition.
# each element of <refined> defines a new sub-partition
# of the current partition.
for fs in sub:
key = {}
for sym, ts in t[fs].items():
ts2 = m[ts]
key[(sym, ts2)] = None
key = key.keys()
key.sort()
key = tuple(key)
val = refined.get (key, [])
val.append (fs)
refined[key] = val
#W ('sub %r refined to\n' % (sub,))
#pp (refined)
# now we can extra the unique sub-sets of <sub>
# out of <refined>
r2 = set()
for sub in refined.values():
r2.add (tuple(sub))
# extend the new partition scheme with this newly
# newly refined set.
new_part.extend ([list(x) for x in r2])
#W ('new_part=%r\n' % (part,))
if len(new_part) == len(part):
break
else:
part = new_part
# <part> now holds our final partitioning of DFA states.
# collapse those that are equivalent
#W ('final partition: %r\n' % (part,))
# we sort the new partition so that state zero (initial) is in front...
for p in part:
p.sort()
part.sort()
# make old_state->new_state map
m = {}
for i in range(len(part)):
for s in part[i]:
m[s] = i
# translate dfa
old_dfa = t
new_dfa = []
for i in range(len(part)):
# pick the first of the equiv set
if len(part[i]):
equiv = part[i][0]
old_trans = old_dfa[equiv]
new_trans = []
for (sym, ts) in old_trans.items():
new_trans.append ((sym, m[ts]))
new_dfa.append (new_trans)
# translate final states
new_finals = set()
for f in final:
new_finals.add (m[f])
# ugh, what a bitch that was!
return new_dfa, new_finals