// ===========================================================================
// renouftxt.js
// 4 whn u r v shrt on spc
// Invented by Tim Renouf, 2009. This implementation by David Moore, 2009.
// Copyright 2009, David Moore
//
// still to do:
// This should all be turned into an object, it could then be cached instead
// of re-evaluating the words each time.
// Having chosen one abbreviation we can't go back to the original word
// The cursor positioning probably won't work in IE, see 
// http://javascript.gakaa.com/inputtypepassword-selectionend-2-0-selectionstart.aspx
// replaceWords does not preserve the case of the original words
// tokenize doesn't handle contractions like shan't, he's, you'll, can't
// using greedy algorithm for the knapsack problem may give unoptimal results
// toNumber should handle numbers > 10
// should be able to handle multiword acronyms and abbreviations
// ===========================================================================

function renouftxt(field, keyEvent)
{
    var key = getKey(keyEvent);
    if (key==null || key==0 || key==8 || key==9 || key==13 || key==27) {
        // control key
        return true;
    }

    if (!('renouftxtCompression' in field)) {
        field.renouftxtCompression = 0;
    }
    //info(field.renouftxtCompression);
    

    var full = field.maxLength - 3;
    if (full < 10) {
        // we need more room than that
        return true;
    }

    var goal     = Math.max(5, field.value.length/10);

    if (field.value.length > full && field.renouftxtCompression < goal) {
        var textBits = splitAroundCurrentWord(field);
        var goal     = Math.max(5, field.value.length/10);
        var achieved = 0;

        // easy space replacements
        var leftText  = removeExtraSpaces(textBits[0]);
        achieved += (textBits[0].length - leftText.length);
        var rightText = removeExtraSpaces(textBits[2]);
        achieved += (textBits[2].length - rightText.length);
        //info("goal = "+goal);

        if (goal > achieved) {
            // compress words
            var words        = tokenize(leftText + " " + rightText);
            var replacements = greedyCompress(words, goal);

            var newLeftText  = replaceWords(replacements, leftText);
            achieved += (leftText.length - newLeftText.length);
            leftText = newLeftText;

            var newRightText = replaceWords(replacements, rightText);
            achieved += (rightText.length - newRightText.length);
            rightText = newRightText;
        }
        //info("achieved = "+achieved);
        field.renouftxtCompression += achieved;
        
        // update the DOM
        var newText = leftText;
        newText += textBits[1];
        newText += rightText
        
        var newPos = field.selectionStart - (textBits[0].length - leftText.length);
        field.value = newText;
        field.selectionStart = newPos
        field.selectionEnd   = newPos;
    }

    //info(field.selectionStart+", "+full+", "+field.maxLength);

    return true;
}

function removeExtraSpaces(text)
{
    return text.replace(/\s\s+/g, " ");
}

// avoid compressing the current word
function splitAroundCurrentWord(field)
{
    var text  = field.value
    var start = text.lastIndexOf(" ", field.selectionStart-1);
    if (start == -1) start = 0;
    var end   = text.indexOf(" ", field.selectionStart);
    if (end == -1)   end = text.length -1;

    var textBits = new Array();
    textBits[0] = text.slice(0, start);
    textBits[1] = text.slice(start, end + 1);
    textBits[2] = text.slice(end + 1);

    //info(textBits);
    return textBits;
}

function tokenize(text)
{
    var words = text.split(/\W/);
    return words;
}

// WordReplacement class
function WordReplacement(word, compressedWord, readability)
{
    this.word             = word;
    this.numReplacements  = 1;
    this.compressedWord      = compressedWord;

    // 1.0 = most readable, 0 = unreadable
    this.readability      = readability;
}

WordReplacement.prototype.charsCompressed = function()
{
    return (this.word.length - this.compressedWord.length) * 
           this.numReplacements;
}

WordReplacement.prototype.score = function()
{
    return this.charsCompressed() * this.readability;
}

// higher scores first
WordReplacement.compareByScore = function(lhs, rhs)
{
    if (!lhs instanceof WordReplacement || !rhs instanceof WordReplacement) {
        return 0;
    } else {
        return rhs.score() - lhs.score();
    }
}

WordReplacement.compareByWord = function(lhs, rhs)
{
    if (!lhs instanceof WordReplacement || !rhs instanceof WordReplacement) {
        return 0;
    } else {
        return String.localeCompare(lhs.word, rhs.word);
    }
}

// we want to achieve the compression goal while changing the least 
// number of words the least amount.... oh no it's the knapsack problem
function greedyCompress(words, goal)
{
    var replacementsByScore = new Array();
    var replacementsByWord  = new Object();

    for (var i = 0; i < words.length; ++i) {
        var word = words[i];

        if (word in replacementsByWord) {
            replacementsByWord[word].numReplacements++;

        } else {
            replacement = greedyReplaceWord(word);
            if (replacement != null) {
                replacementsByWord[word] = replacement;
            }
        }
    }

    for (var word in replacementsByWord) {
        replacementsByScore.push(replacementsByWord[word]);
    }

    replacementsByScore.sort(WordReplacement.compareByScore);

    var numReplacements = 0;
    for (numReplacements = 0; 
         goal > 0 && numReplacements < replacementsByScore.length; 
         ++numReplacements) {
        var replacement = replacementsByScore[numReplacements];
        goal -= replacement.charsCompressed();
    }

    return replacementsByScore.slice(0, numReplacements);
}

function greedyReplaceWord(word)
{
    var replacement = null;
    var replaceMethods = [ toNumber, abbrev, innerAbbrev, removeVowels ];

    for (var i = 0; i < replaceMethods.length; ++i) {
        var methodReplacement = replaceMethods[i](word);

        if (methodReplacement) {
            if (replacement == null ||
                methodReplacement.score() > replacement.score()) {
                replacement = methodReplacement;
            }
        }
    }

    return replacement;
}

function toNumber(word)
{
    var num;

    switch (word) {
        case "zero": 
            num = "0"; break;
        case "one": 
            num = "1"; break;
        case "two": 
            num = "2"; break;
        case "three": 
            num = "3"; break;
        case "four": 
            num = "4"; break;
        case "five": 
            num = "5"; break;
        case "six": 
            num = "6"; break;
        case "seven": 
            num = "7"; break;
        case "eight": 
            num = "8"; break;
        case "nine": 
            num = "9"; break;
        case "ten": 
            num = "10"; break;
        default:
            return null;
    }

    return new WordReplacement(word, num, 1.0);
}

// NOTE: use $$ for $
function abbrev(word)
{
    var dictionary = { 
        // word,           compressed, readability
        "text" :           [ "txt",    0.9 ],
        "for" :            [ "4",      0.8 ],
        "you" :            [ "u",      0.9 ],
        "are" :            [ "r",      0.9 ],
        "space" :          [ "spc",    0.8 ],
        "implementation" : [ "impl",   0.8 ],
        "clear" :          [ "clr",    0.8 ],
        "doctor" :         [ "dr",     0.9 ],
        "people" :         [ "ppl",    0.8 ],
        "great" :          [ "gr8",    0.8 ],
        "money" :          [ "$$",     0.8 ],
        "apple" :          [ "apl",    0.6 ],
        "shirt" :          [ "shrt",   0.6 ],
        "very" :           [ "v",      0.6 ],
        "banana" :         [ "nana",   0.6 ],
        "number" :         [ "num",    0.9 ],
        "procedure" :      [ "proc",   0.9 ],
        "process" :        [ "prcs",   0.8 ],
        "pieces" :         [ "pcs",    0.9 ],
        "manager" :        [ "mgr",    0.9 ],
        "through" :        [ "tru",    0.9 ],
        "though" :         [ "tho",    0.9 ],
        "tough" :          [ "tuff",   0.7 ],
        "chapter" :        [ "chpt",   0.9 ],
        "and" :            [ "n",      0.9 ],
    }

    if (word in dictionary) {
        return new WordReplacement(word, 
                                   dictionary[word][0], 
                                   dictionary[word][1]);
    } else {
        return null;
    }
}

function innerAbbrev(word)
{
    var abbrev = word;
    var dictionary = { 
        "tion"  :  "tn",
        "er"   :   "r",
        "ing"  :   "g",
        "ough" :   "o",
        "ue"   :   "u",
        "kn"   :   "n",
    };

    for (var part in dictionary) {
        abbrev = abbrev.replace(new RegExp(part, "gi"), dictionary[part]);
    }

    if (abbrev != word) {
        return new WordReplacement(word, abbrev, 0.5);
    } else {
        return null;
    }
}

function removeVowels(word)
{
    var nvwls = word.replace(/[aeiou]/gi, "");
    if (!nvwls || nvwls[0] != word[0]) {
        return null;
    }

    return new WordReplacement(word, nvwls, 0.1);
}

function replaceWords(replacements, text)
{
    for (var i = 0; i < replacements.length; ++i) {
        var replacement = replacements[i];

        text = text.replace(new RegExp(replacement.word, "gi"), 
                            replacement.compressedWord);
    }

    return text;
}

function info(msg)
{
    var status1 = document.getElementById('status1');
    status1.textContent = msg;
}

function getKey(keyEvent)
{
    var keyCode = 0;

    if (keyEvent) {
        return keyEvent.which;
    } else if (window.event) {
        return window.event.keyCode;
    } else {
        return null;
    }
}

function renouftxtClear(field)
{
    field.value = "";
    field.renouftxtCompression = 0;
}

