User:PerfektesChaos/WikidiffLX/coding/Word.cpp

Based on wikidiff2/Word.h (rev:67994 Jun 2010).


/**
 * A small class to accomodate word-level diffs by Tim Starling (Wikidiff2)
 *
 * Extended for WikidiffLX by PerfektesChaos@de.wikipedia 2011
 * GPL.
 */

#include "Word.h"

// Basically, a body and an optional suffix (the latter consisting of whitespace),
// where only the bodies are compared on operator==.
// For presentation the length of the suffix (not the invisible content)
// can be retrieved, and suffixes may be compared internally.
//
// This class stores iterators pointing to the line string, this is to avoid
// excessive allocation calls. To avoid invalidation, the source string should
// not be changed or destroyed.

   /**
     * The body is the character sequence [bs, be)
     * The whitespace suffix is the character sequence [be, se)
     */
Word::Word(Iterator bs, Iterator be, Iterator se)
      : bodyStart(bs), bodyEnd(be), suffixEnd(se)
   {}

bool Word::operator==(const Word &w) const {
      return (bodyEnd - bodyStart == w.bodyEnd - w.bodyStart)
         && std::equal(bodyStart, bodyEnd, w.bodyStart);
   }
bool Word::operator!=(const Word &w) const {
      return !operator==(w);
   }
bool Word::operator<(const Word &w) const {
      return std::lexicographical_compare(bodyStart, bodyEnd, w.bodyStart, w.bodyEnd);
   }
   // Get the body as a string
Word::operator String() const {
      return String(bodyStart, bodyEnd);
   }

   // Get the whole word as a string
Word::String Word::whole() const {
      String w;
      get_whole(w);
      return w;
   }

   // Assign the whole word to a string
void Word::get_whole(String & w) const {
      // Do it with swap() to avoid a second copy
      String temp(bodyStart, suffixEnd);
      temp.swap(w);
   }
void Word::get_body(String & w) const {
      // Do it with swap() to avoid a second copy
      String temp(bodyStart, bodyEnd);
      temp.swap(w);
   }

bool Word::equals_suffix(const Word * w) const {
      // True iff this and other word w suffixes are literally equal
      return (suffixEnd - bodyEnd == w->suffixEnd - w->bodyEnd)
             & std::equal(bodyEnd, suffixEnd, w->bodyEnd);
   }   // equals_suffix()
size_t Word::get_suffixlength() const {
      // currently bytes=ASCII only
      return (suffixEnd - bodyEnd);
   }   // get_suffixlength()