User:PerfektesChaos/WikidiffLX/coding/Line.cpp
Declarations in Line.h
Basic idea stolen from wikidiff2/Word.h but extended by specific flags:
member | type | meaning |
---|---|---|
trailingEnd | Iterator | point after last char of last trailing invisible line, maybe equal to suffixEnd |
number | size_t | line (paragraph) number in original counting of \n (if not NO_LINE_NUMBERS) |
lineHard | bool | this line is terminated by \n or last line at all |
lineBlack | bool | this change line has no difference compared to the corresponding match except trailing whitepace |
op | int | DiffOp |
match | Line* | pointer to the corresponding line for copy, NULL for del and add, and one of them for change. |
#include "Line.h"
// A small class to accomodate lines with hard and virtual termination
// Basically, the pointers and a marker indicating the termination type.
// Only the bodies are compared on operator==.
// For presentation the length of the suffix (not the invisible content)
// can be retrieved, and suffixes may be compared internally.
// Also the number of trailing lines and each length (not the invisible content)
// can be retrieved, and trailing lines may be compared internally.
//
// This class stores iterators pointing to the text string, this is to avoid
// excessive allocation calls. To avoid invalidation, the source string should
// not be changed or destroyed.
/**
* The body is the character sequence [bs, be)
* The space suffix is the character sequence [be, se), none if se=be
* A \n break is indicated by hard; false for virtual break
* Trailing empty lines after hard break is [se, te), none if te=se
* The suffix is at least a "\n" for hard breaks
* and one space if virtual break detected by ". "
* suffix is empty at end of text.
* The number is the line number in original \n counting
*
* Introduced in WikidiffLX by PerfektesChaos@de.wikipedia 2011
*/
#ifdef NO_LINE_NUMBERS
Line::Line(Iterator bs, Iterator be, Iterator se, Iterator te)
: bodyStart(bs), bodyEnd(be), suffixEnd(se), trailingEnd(te)
{
// \n break or end of text
lineHard = true;
}
Line::Line(Iterator bs, Iterator be, Iterator se)
: bodyStart(bs), bodyEnd(be), suffixEnd(se)
#else
Line::Line(Iterator bs, Iterator be, Iterator se, Iterator te, size_t n)
: bodyStart(bs), bodyEnd(be), suffixEnd(se), trailingEnd(te), number(n)
{
// \n break or end of text
lineHard = true;
lineBlack = false;
}
Line::Line(Iterator bs, Iterator be, Iterator se, size_t n)
: bodyStart(bs), bodyEnd(be), suffixEnd(se), number(n)
#endif
{
// virtual line
lineHard = false;
trailingEnd = be;
lineBlack = false;
}
bool Line::operator==(const Line &o) const
{
return (bodyEnd - bodyStart == o.bodyEnd - o.bodyStart)
&& std::equal(bodyStart, bodyEnd, o.bodyStart);
}
bool Line::operator!=(const Line &o) const
{
return !operator==(o);
}
bool Line::operator<(const Line &o) const
{
return std::lexicographical_compare(bodyStart, bodyEnd, o.bodyStart, o.bodyEnd);
}
//fade out? diffEngine
Line::operator String() const
{
return String(bodyStart, suffixEnd);
}
bool Line::is_Change() const
{
return (op == DiffOp<Line>::change);
} // is_Change()
bool Line::is_Copy() const
{
return (op == DiffOp<Line>::copy);
} // is_Copy()
bool Line::is_HardBreak() const
{
// true: \n false: virtual
return lineHard;
} // is_HardBreak()
bool Line::equals_body() const
{
return lineBlack;
} // is_HardBreak()
bool Line::equals_suffix(const Line * o) const
{
// True iff this and other line o suffixes are literally equal
if (suffixEnd == bodyEnd) {
return (o->suffixEnd == o->bodyEnd);
}
return (suffixEnd - bodyEnd == o->suffixEnd - o->bodyEnd)
& std::equal(bodyEnd, suffixEnd, o->bodyEnd);
} // equals_suffix()
bool Line::equals_trailing(const Line * o, size_t i) const
{
Iterator pB = point2break(i);
Iterator pB2 = o->point2break(i);
Iterator pE = point2break(i+1);
Iterator pE2 = o->point2break(i+1);
if (pE == pB) {
return (pE2 == pB2);
}
return (pE - pB == pE2 - pB2)
& std::equal(pB, pE, pB2);
} // equals_trailing(Line, size_t)
Line::String Line::get_body() const
{
// Retrieve visible body
return String(bodyStart, bodyEnd);
} // get_body()
Line::Iterator Line::get_bodyBegin() const {
return bodyStart;
}
Line::Iterator Line::get_bodyEnd() const {
return bodyEnd;
}
Line * Line::get_counterPart() const
{
return match;
} // get_counterPart()
int Line::get_diffCode() const
{
return op;
} // get_diffCode()
#ifndef NO_LINE_NUMBERS
size_t Line::get_lineNumber() const
{
return number;
} // get_lineNumber()
#endif
size_t Line::get_suffixLength() const
{
Iterator p = bodyEnd;
return count4invisible(&p, suffixEnd);
} // get_suffixLength()
size_t Line::get_trailingCount() const
{
// Retrieve number of empty trailing lines
size_t k = 0;
if (trailingEnd > suffixEnd) {
Iterator p = suffixEnd + 1; // '\n' at suffixEnd
k = 0;
while (p < trailingEnd) {
p = point2break(p);
k++;
} // while
} // trailing exists
return k;
} // get_trailingCount()
size_t Line::get_trailingLength(const size_t i) const
{
// Retrieve length of empty trailing line #i
// i counts from 0 to get_trailingCount-1
Iterator p = point2break(i);
return count4invisible(&p, trailingEnd);
} // get_trailingLength()
void Line::set_diff(const int k)
{
op = k;
match = NULL;
}
void Line::set_diff(const int k, Line * p) {
op = k;
match = p;
}
void Line::set_diffCopyChange()
{
if (op = DiffOp<Line>::copy) {
op = DiffOp<Line>::change;
lineBlack = true;
match->op = DiffOp<Line>::change;
match->lineBlack = true;
} else {
op = DiffOp<Line>::change;
}
}
void Line::whitespaceOnly()
{
if (op == DiffOp<Line>::copy) {
if (match->op == DiffOp<Line>::copy) {
bool leap = true;
if (trailingEnd == bodyEnd) {
leap = (match->trailingEnd > match->bodyEnd);
} else if (trailingEnd - bodyEnd ==
match->trailingEnd - match->bodyEnd) {
leap = std::equal(bodyEnd, trailingEnd, match->bodyEnd);
}
if (leap) {
lineBlack = true;
op = DiffOp<Line>::change;
match->op = DiffOp<Line>::change;
match->lineBlack = true;
}
}
}
} // whitespaceOnly()
Line::Iterator Line::point2break(const Iterator point) const
{
Iterator p = point;
unsigned char b;
while (p < trailingEnd) {
b = (unsigned char)*p;
if (b == 0x0A) {
return p + 1;
}
p++;
} // while
return trailingEnd;
} // point2break(Iterator)
Line::Iterator Line::point2break(const size_t i) const
{
size_t k = 0;
Iterator p = suffixEnd + 1; // '\n' at suffixEnd
while (p < trailingEnd) {
p = point2break(p);
if (k == i) {
break; // while
}
k++;
} // while
return p;
} // point2break(size_t)
size_t Line::count4invisible(Iterator *p, const Iterator pEnd) const
{
size_t n = 0;
unsigned char b;
while (*p < pEnd) {
b = (unsigned char)**p;
if (b == 0x0A) {
return n;
} else if (b >= 0xE0) {
//UTF8 whitespace: U+2002...U+200A
//Characters in range U+0800 to U+FFFF represented by 3 bytes
//first octet is 1110 = 14 = xE
n += 2;
}
// ASCII whitespace <= 0x20
n++;
} // while
return n;
} // count4invisible()