The Hooft Family
  
  
#
# Puzzling adventures Sci.Am. Dec 2003.
# Version 2004/05/10
recordlength = {
 3:  7, #2.3333 =   4 AABCBAC - ABCCACB 0.03
 4: 10, #2.5000 =   2 ABBCADCDBA - ABCDCADBBA 0.07
 5: 12, #2.4000 = 212 AABCDCEBAEDB - ABCDEEDBDACB 0.69 
 6: 15, #2.5000 = 770 AABCDBEFCFADEBA - ABCDEFFEDBACFCA 6.14 
 7: 18, #2.5714 =1630 AABCDBEFGCGEADFBAC - ABCDEFGGFEDBAEACFB 107.32 
 8: 21, #2.6250 = >21 ABACDEFGDHECHGBBFEADC - ABCDEFGHHGFECBDACAFBE ~3600
 9: 24, #2.6667 >   1 ABCDEFGGHIBHEDICFDAEACBG (40h)
10: 26, #2.6000 > 337 ABCDEFGHIJACEJIICBHBDGADFE (3m)
        #  2.7000 Hopeless? Challenge +1; 1e6 steps; av. err = 2.33; 81 * 1 error
11: 29, #2.6364 >   1 ABCDEFAGHEIGCJJHKDKGBIFBAEDCH (15y)
12: 31, #2.5833 >3868 ABCDEFGHIJKLLEKDAJBGJAIFBDCHCKF (1050y)
        #  2.6667 Challenge +1; 1.4e6 steps; av. err = 2.17; 324 * 1 error
13: 34, #2.6154 >   2 ABCDEFGHIJKLMMBHGIDAGJLDCEAEKFCBFI (80ky)
14: 37, #2.6429 >   1 ABCDEFGHIJKLHMKEMLFNGNEADBJIMCCAHDFBK (7My)
15: 39, #2.6000 >   4 ABCDEFBGHIHGAJEKLCMMNOKFODIJCNGDLAIKBHE (700My)
        #  2.6667 Challenge +1; 1e6 steps; av. err = 2.42; 81 * 1 error
16: 42, #2.6250 >   6 ABCDEFGHBIJKLMNKOCGNJMPEPOADDKHJFCLFHIGAEB (8Gy)
17: 45, #2.6470 >   1 ABCDEFGFHIJKLLMNOAMPCJQODNBQCIMEPKNGHEGDBAFLJ
18: 47, #2.6111 >   1 ABCDEFGHIJKLMENMLDOPKBQGIOQRHNDCCRJAFAPLHDIBFEK
        #  2.6667 Must be challenged!
19: 50, #2.6315 >   1 ABCDEFGHIJKILMNOOBPQRSKEQSRGNLHJSDFCACLIGDPEBMAKHF
20: 53, #2.6500 >   1 ABCDEFGHEIJKLAMINOPQRPKSQTODNCLSLHGMBRFOJTDBGKIHCAPEE
21: 55, #2.6190 >   1 ABCDEFGHIJKLKMNLOEHJGPDQRSTUQCASROOBMFBUNITPFELDUGAMKCH
        #  2.6667 Must be challenged!
22: 57, #2.6364 >   1 ABCDEFGHIJKLMNMOPLDQRFNSGTPRHBQERTUVVOUCSJAIKFIDBPQOCHGMLE
23: 61, #2.6522 >   1 ABCDEFGHIJKLMNJOPOQRSTFBSUQVKWPMUTTEHVECNPADIWLRKDQAFJGGBMOHC
24: 63, #2.625  >   1 ABCDEFGHIJKKLBMNOPQREQSTSGPRUDNVJWUHXOCMWIACPFSLVFXIMAEHKGRDTJB
        #  2.6667 Must be challenged!
25: 66, #2.6400 >   1 ABCDEFGHIJKLEMNOPQRRSOGTANUJSPVWVCMDLXWQTWHBYRHJLYKFXIDUCBAPFNMGEO
26: 68, #2.6153 >  10 ABCDEFGHIJKLMNOPQRSTUVWXYZZQNBWVMYMAPTYHJOCRVGIAXDUSEKSDHNCFKQLFJIBZ
        #  2.6538 Challenge +1; 1.6e5 steps; av. err = 3.06 ; 0 * 1 error
        #                       7.0e6 steps; av. err = 2.74 ; 723 * 1 error
38:100, #2.6316 >   1 ABCDEFGHIJKLMNBOPQRSTUVWXWYZaHZbPGEcdefghiARjJehNTkkDlKjfXldieDVYbUaNQfbICRHOQcFZSKFLCgUEIBGMaXPJTMA
        #  2.6579 Must be challenged!
}
#
# Rules:
# - A long string can be made by symbols+reverse(symbols), length=2N
# - Adding one symbol can always be done at the front and the back: Z.....Z,
#   making MAXLEN(N+1) >= MAXLEN(N) +2

do_profile = 0 # Try to profile the program?
do_startseries = 1 # Start series by ABC...ZZ? 
do_shuffle = 1 # Randomize the order of the sequences used to loop over?
               # shuffle = 0 is useful with method 1 (systematic search)
method = 3 # recurse completely=1 or mutate the start=2 or directed mutation=3
challenge = 1 # used by method 3, how much longer than the record should we
              # shoot for? 0, 1 are reasonable values.

# Which symbols are allowed?
import sys
if len(sys.argv) == 2:
    symbols = range(int(sys.argv[1]))
else:
    symbols=range(26)

class SeqPr(list):
    def __str__(self):
        """A string representation. Creates a string representation where each
           of the letters is introduced in order. Prefixed by the length"""
        letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
        m = {}
        s = []
        next = 0
        for x in self:
            if m.has_key(x):
                s.append(m[x])
            else:
                m[x] = letters[next]
                next += 1
                s.append(m[x])
        return "%2d %s"%(len(self), ''.join(s))

class Sequence(SeqPr):
    def __init__(self, s, data):
        list.__init__(self, s)
        self.data = data
        self.rls = range(len(s))

    def add(self, symbol):
        """
        Add a symbol to the end. Return None if it is not possible,
        otherwise return a new instance.
        """
        ndata = {}
        s = list(self)
        d = self.data
        for i in self.rls:
            pair = (s[-i-1], symbol, i)
            if pair in d:
                return None
            ndata[pair] = None
        ndata.update(d)
        s.append(symbol)
        return self.__class__(s, ndata)

    def mutate(self, position, symbol):
        """
        Try to change the symbol at position (0=first) to the given symbol.
        If this fails, returns None, otherwise a new instance.
        """
        ndata = {}
        ndata.update(self.data)
        s=list(self)
        oldsymbol = s[position]
        for i in self.rls:
            if position>i:
                pair = (s[i], oldsymbol, position-i-1)
            elif positioni:
                pair = (s[i], symbol, position-i-1)
            elif positioni:
                pair = (s[i], oldsymbol, position-i-1)
            elif position1:
            for symbol in symbols[-2:1:-1]:
                seq = seq.add(symbol)
    return seq

class Stats:
    def __init__(self):
        self.maxlen = 0
        self.emp()
        self.clearrecent()
        self.f = open('stats%02d.log'%len(symbols),'a')
        self.nmutate = 0
        self.nmutsuc = 0
        self.nrecurs = 0
        self.newlen()
        
    def emp(self):
        self.arr = {}

    def newlen(self):
        self.sumlen = 0
        self.numlen = 0
        
    def new(self, seq):
        self.sumlen += len(seq)
        self.numlen += 1
        if len(seq) >= len(self._recentbest):
            self._recentbest = seq
        if len(seq) < self.maxlen:
            return
        if len(seq) > self.maxlen:
            self.maxlen = len(seq)
            self.emp()
        if seq in self.arr:
            return
        self.arr[seq] = None
        self._best = seq
        print len(self.arr),seq
        self.f.write('%s\n'%seq)
        self.f.flush()

    def clearrecent(self):
        self._recentbest = []

    def recentbest(self):
        return self._recentbest
    
    def best(self):
        return self._best

# Method one: total recursion

def recurse(org):
    global stats
    stats.nrecurs += 1
    if stats.nrecurs % 10000 == 0:
        print "Number of recursions: %d, loc av len = %.2f"%(stats.nrecurs,float(stats.sumlen)/stats.numlen)
        stats.newlen()
    for symbol in order(symbols):
        new = org.add(symbol)
        if new is not None:
            stats.new(new)
            recurse(new)
        if not do_shuffle and not symbol in org:
            # Don't try to add other new symbols.
            # Symbols must be introduced in order.
            break

def recurse_boot():
    global stats
    stats = Stats()
    try:
        seq = startseq()
        if do_profile:
            global proffunc
            def proffunc(seq=seq):
                recurse(seq)
            import profile
            profile.run("proffunc()")
        else:
            recurse(seq)
    except KeyboardInterrupt:
        print "Break!"

    print len(stats.best()), "n=",len(stats.arr)

# Method 2: mutate the begin and recurse the end

def mc_recurse(org, stats):
    stats.nrecurs += 1
    for symbol in order(symbols):
        new = org.add(symbol)
        if new is not None:
            stats.new(new)
            mc_recurse(new, stats)
    return stats.best()

def mc():
    import random
    try:
        seq = startseq()
        # Lengthen until no longer possible
        while 1:
            for symbol in order(symbols):
                new = seq.add(symbol)
                if new is not None:
                    seq = new
                    break
            else:
                break
        stats = Stats()
        stats.new(seq)
        # Now enter endless change loop
        nshrink = 1
        mcrecbases = {}
        while 1:
            if stats.nmutsuc%25 == 0:
                print "Mutation attempts: %d Success: %d. Recursions: %d. Curlen: %d"%(stats.nmutate,stats.nmutsuc,stats.nrecurs,len(seq))
            # Shrink until preset length
            while len(seq) > len(stats.best()) - nshrink:
                seq = seq.shrink()
            # Make a single mutation
            success = False
            for i in range(6):
                position = random.randint(0,len(seq)-1)
                for symbol in order(symbols):
                    if symbol != seq[position]:
                        stats.nmutate += 1
                        new = seq.mutate(position, symbol)
                        if new:
                            stats.nmutsuc += 1
                            success = True
                            seq = new
                            stats.new(seq)
                            break
                if success:
                    break
            else:
                # Mutations are failing too often. Shrink more.
                nshrink += 1
                print "Increased nshrink to", nshrink
                
            # Try all possible extensions, and continue with the best
            # But make sure that we are not doing the same recursion twice.
            s = str(seq)
            if s in mcrecbases:
                print "Skipped equivalent recursion"
            else:
                mcrecbases[s] = None
                stats.clearrecent()
                mc_recurse(seq, stats)
                new = stats.recentbest()
                if new:
                    seq = new
            
    except KeyboardInterrupt:
        print "Break!"

def randsymb():
    return symbols[random.randint(0,len(symbols)-1)]

class ErrSeq(SeqPr):
    def __init__(self,*arg,**kw):
        list.__init__(self,*arg,**kw)
        self.heap = {}
        self.err = 0

    def __setitem__(self, i, v):
        self.scoreRemove(i,self[i])
        list.__setitem__(self,i,v)
        self.scoreAdd(i,self[i])

    def append(self, v):
        list.append(self, v)
        self.scoreAdd(len(self)-1,v)

    def scoreAdd(self, i, value, delta = +1):
        s=list(self)
        for j in range(len(self)):
            if i 0 and self.heap[pair] > 1) or
                    (delta < 0 and self.heap[pair] > 0)):
                    self.err += delta
            elif i>j:
                pair = (s[j], s[i], i-j-1)
                self.heap[pair] = self.heap.get(pair,0) + delta
                if ((delta > 0 and self.heap[pair] > 1) or
                    (delta < 0 and self.heap[pair] > 0)):
                    self.err += delta
        #print "scoreAdd",i,value,delta,self.err

    def scoreRemove(self, i, value):
        self.scoreAdd(i, value, delta = -1)

    def score(self):
        return self.err

    def posscore(self, i):
        s=list(self)
        r = 0
        for j in range(len(self)):
            if i1:
                    r += 1
            elif i>j:
                pair = (s[j], s[i], i-j-1)
                if self.heap[pair]>1:
                    r += 1
        return r
    
def directed_mutations():
    targetlength = recordlength[len(symbols)] + challenge
    s=ErrSeq()
    while len(s) < targetlength:
        s.append(randsymb())
    tempbase = len(symbols)*targetlength
    i = 0
    j = 0
    k = 0
    ssold = {}
    sumerr = 0
    numerr = 0
    while 1:
        if do_profile and i>2000:
            return
        k += 1
        for position in range(len(s)):
            if random.random()>1./targetlength and s.posscore(position)==0:
                continue
            for symbol in order(symbols):
                err = s.score()
                i += 1
                if err == 0:
                    print "Number of steps: %d (%d); round %d"%(i, j, k)
                    print "="*20
                    print s
                    print "="*20
                    f=open('%d_%d.log'%(len(symbols),targetlength),'a')
                    f.write("====\n%s\n====\n"%ss)
                    f.close()
                    return
                elif i%1000 == 0:
                    print "Number of steps: %d (%d); round %d; Error: %d (%.2f)"%(i, j, k, err, float(sumerr)/numerr)
                    if i==1000:
                        sumerr=0
                        numerr=0 # Reset after initial mess
                    print s
                elif err == 1:
                    ss = str(s)
                    if ss not in ssold:
                        print "1>",ss
                        f=open('%d_%d.log'%(len(symbols),targetlength),'a')
                        f.write("1> %s\n"%ss)
                        f.close()
                        ssold[ss]=None
                oldsymb = s[position]
                s[position] = symbol
                nerr = s.score()
                #print "Replace %d by %d"%(position,s[position]),err,nerr
                if nerr > err and tempbase**(err-nerr) < random.random():
                    s[position] = oldsymb
                else:
                    j += 1
                    sumerr += err
                    numerr += 1
                    break

if method == 1:
    recurse_boot()
elif method == 2:
    mc()
elif method == 3:
    if do_profile:
        import profile
        profile.run('directed_mutations()')
    else:
        directed_mutations()