<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
          "http://www.w3.org/TR/html4/strict.dtd">

<html>
<head>
   <title>Minimum Edit Distance</title>

   <!-- Stylesheet -->
   <style type="text/css">
      /* Use a grid-style table with fixed-width font for the contents */
      table.grid
      {
         border-collapse: collapse;
         margin: 20px;
      }

      table.grid td
      {
         border: thin black solid;
         font-family: monospace;
         height: 1.5em;
         width: 1.5em;
         text-align: center;
      }

      /***
      Use float-left to create liquid content - content that will auto-arrange
      based on the width of the browser.
      ***/
      .liquid
      {
         float: left;
      }

      /***
      Move below any existing liquid output before rendering this element.
      ***/
      .below
      {
         clear: both;
      }

      /***
      Miscellaneous styles
      ***/
      p
      {
         margin: 1.5em;
      }
   </style>
</head>

<body>
   <h1>Minimum Edit Distance</h1>

   <!-- Input form -->
   <form action="#" method="post">
      <p class="liquid">
         <label for="src">Source string: </label>
         <input type="text" id="src" size="10">
         <br>
         <label for="dst">Destination string: </label>
         <input type="text" id="dst" size="10">
         <br>
         <input type="button" value="Find edit distance" onclick="runMED()">
      </p>
      <p class="liquid">
         <label for="ins-cost">Insertion cost: </label>
         <input type="text" id="ins-cost" size="4" value="1">
         <br>
         <label for="del-cost">Deletion cost: </label>
         <input type="text" id="del-cost" size="4" value="1">
         <br>
         <label for="sub-cost">Substitution cost: </label>
         <input type="text" id="sub-cost" size="4" value="2">
      </p>
   </form>

   <!-- Output -->
   <div class="below">
      <table id="output-table" class="grid liquid">
      </table>
      <p id="output-notes" class="liquid">
      </p>
   </div>

   <!-- Footer -->
   <p class="below liquid">
      Written by
      <a href="mailto:wojcikm4@msu.edu">Michael Wojcik</a>
      for CSE 842, Natural Language Processing, with Prof. Joyce Chai,
      Michigan State University, 2011.
      For details see the
      <a href="med-source.html">formatted source</a>
      (produced with
      <a href="http://www.andre-simon.de/">Highlight 3.3</a>).
   </p>

   <!-- Script -->
   <script type="text/javascript">

      function runMED()
      {
         var source, destination;
         source = document.getElementById("src").value;
         destination = document.getElementById("dst").value;

         // Create the matrix and its HTML table representation
         var tableArray = makeTable(source, destination);

         // Run the algorithm, populating the cells
         var result = processTable(tableArray);

         // Add explanatory notes
         var notes = document.getElementById("output-notes");
         // Clear old notes
         while (notes.hasChildNodes()) notes.removeChild(notes.firstChild);
         // Add new notes
         notes.appendChild
         (
            document.createTextNode
            (
               "Simple replacement cost: "
             + (source.length + destination.length).toString()
            )
         );
         notes.appendChild
         (
            document.createElement("br")
         );
         notes.appendChild
         (
            document.createTextNode("Minimum Edit Distance: " + result)
         );

         return 0;
      }

      function makeTable(src, dst)
      {
         var rows = src.length + 2,
             cols = dst.length + 2;
         var table = document.getElementById("output-table");
         var tbody = document.createElement("tbody");
         var prevRow;

         // Create array of rows and set its size (to reduce reallocation)
         var tableElements = [];
         tableElements.length = rows;

         // Add table rows
         for (var i = 0; i < rows; i++)
         {
            // Create this row's array of columns
            tableElements[i] = [];
            tableElements[i].length = cols;

            // Create this row's TR element
            var row = document.createElement("tr");

            // Add columns to this row
            for (var j = 0; j < cols; j++)
            {
               var cell = document.createElement("td");
               var contents = document.createTextNode(" ");
               tableElements[i][j] = contents;
               cell.appendChild(contents);
               row.appendChild(cell);
            }

            /***
            Add TR element to table body. These traditionally are rendered
            from bottom to top, but HTML renders table rows from top to
            bottom (assuming LTR text), so we add TRs to the table in
            reverse order.
            ***/

            if (i == 0)
               tbody.appendChild(row);    // first row
            else
               tbody.insertBefore(row, prevRow);
            prevRow = row;
         }

         /***
         Set source and destination characters in the matrix. The first
         (leftmost) column is the source string, written from bottom to
         top. The first (bottom) row is the destination string, written
         from left to right. Note cell 0,0 is left empty, and the cells
         at 0,1 and 1,0 represent the empty string.
         ***/

         for (var i = 2; i < rows; i++)
            tableElements[i][0].data = src.charAt(i-2);
         for (var j = 2; j < cols; j++)
            tableElements[0][j].data = dst.charAt(j-2);

         // Add empty-string symbol for cells before strings
         tableElements[1][0].data = "\u2205"; // Unicode empty-set (&empty;)
         tableElements[0][1].data = "\u2205";
         // style those nodes a bit differently
         styleSerifItalic(tableElements[1][0], tableElements[0][1]);

         // Add the new table body to the output table so it can be rendered
         if (table.hasChildNodes())
            table.replaceChild(tbody, table.firstChild); // replace old table
         else
            table.appendChild(tbody);

         return tableElements;
      }

      function processTable(tbl)
      {
         var rows = tbl.length;
         var cols = tbl[0].length;
         var costs = {};

         /***
         Get the operation costs from the form.
         ***/

         costs.insertion =
            parseInt(document.getElementById("ins-cost").value, 10);
         costs.deletion =
            parseInt(document.getElementById("del-cost").value, 10);
         costs.substitution =
            parseInt(document.getElementById("sub-cost").value, 10);


         /***
         Create a parallel table with numerical values so we don't have to
         continually convert between numbers and strings.

         TODO: Replace this with properties of the existing table. Add a
         method to set both integer value and display string at the same
         time.
         ***/

         var vtbl = [];
         vtbl.length = rows;
         for (var i = 0; i < rows; i++)
         {
            // Create columns
            vtbl[i] = [];
            vtbl[i].length = cols;

            // Copy in character from source
            vtbl[i][0] = tbl[i][0].data;
         }

         // Copy in characters from destination
         for (var j = 0; j < cols; j++)
            vtbl[0][j] = tbl[0][j].data;

         // Initialize rest of table (so we can display before processing)
         for (var i = 1; i < rows; i++)
            for (var j = 1; j < cols; j++)
               vtbl[i][j] = "";

         /**
         Initialize first column and row.
         
         The first column (after the contents of the source string)
         represents the cost of converting part of the source string to
         the empty string: the cell 1,1 is the cost of changing the first
         character to the empty string; cell 2,1 is the cost of changing
         the first two characters; and so on. Since we'd make that change
         by deleting the source characters, the cost is the length of the
         source string at that row.

         The first row represents the cost of creating part of the
         destination string from the empty string, so its cells are the
         number of insertions required to get that much of the destination
         string.
         ***/

         for (var i = 1; i < rows; i++)
            vtbl[i][1] = i-1;
         for (var j = 1; j < cols; j++)
            vtbl[1][j] = j-1;

         // Process remaining cells
         for (var i = 2; i < rows; i++)
            for (var j = 2; j < cols; j++)
               vtbl[i][j] = computeCell(vtbl, i, j, costs);

         // Copy values from numeric table to display table
         for (var i = 1; i < rows; i++)
            for (var j = 1; j < cols; j++)
               tbl[i][j].data = vtbl[i][j].toString();

         // Return the final result, which is in the top-right cell
         return vtbl[rows-1][cols-1];
      }

      function computeCell(tbl, i, j, costs)
      {
         /***
         Compute the value for the given cell in the table. This is the
         minimum of four possibilities:
            - the value in the cell to the left, plus 1; this represents
              inserting the current character in the destination string
            - the value in the cell below, plus 1; this represents deleting
              the current character in the source string
            - the value in the cell diagonally below and left, plus 2; this
              represents replacing the current character in the source with
              the current character in the destination (replacement costs 2
              because it's equivalent to insert-plus-delete; this is the
              Levenshtein model)
            - the value in the cell diagonally below and left; this represents
              keeping the current character in both strings because they're
              the same (obviously, if they're not the same, this doesn't
              apply)
         Like most authors, we'll treat the last two cases as the same action,
         with a variable cost.

         The costs for the three operations can now be set in the form.
         ***/

         /***
         If we want to be able to backtrack, in order to figure out a minimal
         edit sequence of operations (alignment), we'd make the following
         properties of the table cells rather than using temporary variables.
         ***/

         var insertCost, deleteCost, replaceCost;

         insertCost = tbl[i][j-1] + costs.insertion;
         deleteCost = tbl[i-1][j] + costs.deletion;
         replaceCost = tbl[i-1][j-1]
                     + (tbl[i][0] == tbl[0][j]? 0 : costs.substitution);

         return Math.min(insertCost, deleteCost, replaceCost);
      }

      /***
      Set the style of an HTMLNode, or the container of a textnode, to serif
      font, italic style. Used to dress up some nodes of the table.
      ***/
      function styleSerifItalic()
      {
         // Iterate over parameters
         for (var idx = 0; idx < arguments.length; idx++)
         {
            // Find the object with style
            var node = arguments[idx];
            while (! (node instanceof HTMLElement))
            {
               if (! ("parentNode" in node)) return;
               node = node.parentNode;
            }
            // Set style properties
            node.style.fontFamily = "serif";
            node.style.fontStyle = "italic";
         }
      }

   </script>
</body>
</html>