from mercurial import revlog, util, node
import sys, os
try:
    import json
except ImportError:
    import simplejson as json

def dump(file_, progress=True):
    """dump the contents of an index file, suitable for diff-parent calculations"""
    r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_)
    revs = []
    total = float(len(r))
    for i in r:
        if (i+1) % 10 == 0:
            sys.stderr.write("Completion: %d/%d | %2.2f%%\r" % (i+1, total, 100*i/total))
        n = r.node(i)
        pp = r.parentrevs(i)
        fulltext = len(revlog.compress(r.revision(n))[1])
        tipdelta = len(revlog.compress(r.revdiff(i-1, i))[1])
        ppdelta = []
        for p in pp:
            if p == node.nullrev:
                s = None
            else:
                s = len(revlog.compress(r.revdiff(p, i))[1])
            ppdelta.append(s)
        revs.append([node.hex(n), fulltext] + list(pp) + ppdelta + [tipdelta])
    return json.dumps(revs, indent=0)

def load(s):
    return json.loads(s)

def emul(revs, maxreadfactor):
    bases = {}
    length = 0
    for r, e in enumerate(revs):
        n, fulltext, p1, p2, pdelta1, pdelta2, tipdelta = e
        difflength = {
                r-1: tipdelta,
                p1: pdelta1,
                p2: pdelta2,
        }


        candidates = [r-1, p1, p2]
        readdistances = [(difflength[p], length-bases[p]+difflength[p], p)
                         for p in candidates if p != node.nullrev]
        # filter
        readdistances = [s for s in readdistances if s[1] < maxreadfactor*fulltext]
        if not readdistances:
            bases[r] = length
            length += fulltext
        else:
            delta, ignored, p = min(readdistances)
            bases[r] = bases[p]
            length += delta
    return length


if __name__ == "__main__":
    action = sys.argv[1]
    if action == 'dump':
        # second arg must be a revlog, dump the index to stdout
        sys.stdout.write(dump(sys.argv[2]))
    elif action == 'emul':
        # read the revs from stdin
        revs = load(sys.stdin.read())
        length = emul(revs, 3)
        print 'estimated file size: %12d bytes (%6.1f MiB)' % (length, length/1024.0/1024.0)
