Logo Search packages:      
Sourcecode: mailody version File versions  Download package

kmime_charfreq.cpp

/*
    kmime_charfreq.cpp

    KMime, the KDE internet mail/usenet news message library.
    Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA 02110-1301, USA.
*/

#include "kmime_charfreq.h"

namespace KMime {

CharFreq::CharFreq( const QByteArray & buf )
  : NUL(0),
    CTL(0),
    CR(0), LF(0),
    CRLF(0),
    printable(0),
    eightBit(0),
    total(0),
    lineMin(0xffffffff),
    lineMax(0),
    mTrailingWS(false),
    mLeadingFrom(false)
{
  if ( !buf.isEmpty() )
    count( buf.data(), buf.size() );
}

CharFreq::CharFreq( const char * buf, size_t len )
  : NUL(0),
    CTL(0),
    CR(0), LF(0),
    CRLF(0),
    printable(0),
    eightBit(0),
    total(0),
    lineMin(0xffffffff),
    lineMax(0),
    mTrailingWS(false),
    mLeadingFrom(false)
{
  if ( buf && len > 0 )
    count( buf, len );
}

static inline bool isWS( char ch ) { return ( ch == '\t' || ch == ' ' ); }

void CharFreq::count( const char * it, size_t len ) {

  const char * end = it + len;
  uint currentLineLength = 0;
  // initialize the prevChar with LF so that From_ detection works w/o
  // special-casing:
  char prevChar = '\n';
  char prevPrevChar = 0;

  for ( ; it != end ; ++it ) {
    ++currentLineLength;
    switch ( *it ) {
    case '\0': ++NUL; break;
    case '\r': ++CR;  break;
    case '\n': ++LF;
      if ( prevChar == '\r' ) { --currentLineLength; ++CRLF; }
      if ( currentLineLength >= lineMax ) lineMax = currentLineLength-1;
      if ( currentLineLength <= lineMin ) lineMin = currentLineLength-1;
      if ( !mTrailingWS )
      if ( isWS( prevChar ) || ( prevChar == '\r' && isWS( prevPrevChar ) ) )
        mTrailingWS = true;
      currentLineLength = 0;
      break;
    case 'F': // check for lines starting with From_ if not found already:
      if ( !mLeadingFrom )
      if ( prevChar == '\n' && end - it >= 5 && !qstrncmp( "From ", it, 5 ) )
        mLeadingFrom = true;
      ++printable;
      break;
    default:
      {
      uchar c = *it;
      if ( c == '\t' || c >= ' ' && c <= '~' )
        ++printable;
      else if ( c == 127 || c < ' ' )
        ++CTL;
      else
        ++eightBit;
      }
    }
    prevPrevChar = prevChar;
    prevChar = *it;
  }

  // consider the length of the last line
  if ( currentLineLength >= lineMax ) lineMax = currentLineLength;
  if ( currentLineLength <= lineMin ) lineMin = currentLineLength;

  // check whether the last character is tab or space
  if ( isWS( prevChar ) )
    mTrailingWS = true;

  total = len;
}

bool CharFreq::isEightBitData() const {
  return type() == EightBitData;
}

bool CharFreq::isEightBitText() const {
  return type() == EightBitText;
}

bool CharFreq::isSevenBitData() const {
  return type() == SevenBitData;
}

bool CharFreq::isSevenBitText() const {
  return type() == SevenBitText;
}

bool CharFreq::hasTrailingWhitespace() const {
  return mTrailingWS;
}

bool CharFreq::hasLeadingFrom() const {
  return mLeadingFrom;
}

CharFreq::Type CharFreq::type() const {
#if 0
  qDebug( "Total: %d; NUL: %d; CTL: %d;\n"
        "CR: %d; LF: %d; CRLF: %d;\n"
        "lineMin: %d; lineMax: %d;\n"
        "printable: %d; eightBit: %d;\n"
          "trailing whitespace: %s;\n"
          "leading 'From ': %s;\n",
        total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
        printable, eightBit,
        mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no" );
#endif
  if ( NUL ) // must be binary
    return Binary;

  // doesn't contain NUL's:
  if ( eightBit ) {
    if ( lineMax > 988 ) return EightBitData; // not allowed in 8bit
    if ( CR != CRLF || controlCodesRatio() > 0.2 ) return EightBitData;
    return EightBitText;
  }

  // doesn't contain NUL's, nor 8bit chars:
  if ( lineMax > 988 ) return SevenBitData;
  if ( CR != CRLF || controlCodesRatio() > 0.2 ) return SevenBitData;

  // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars:
  return SevenBitText;
}

float CharFreq::printableRatio() const {
  if ( total ) return float(printable) / float(total);
  else         return 0;
}

float CharFreq::controlCodesRatio() const {
  if ( total ) return float(CTL) / float(total);
  else         return 0;
}

} // namespace KMime



Generated by  Doxygen 1.6.0   Back to index