/***************************** LICENSE START ***********************************

 Copyright 2012 ECMWF and INPE. This software is distributed under the terms
 of the Apache License version 2.0. In applying this license, ECMWF does not
 waive the privileges and immunities granted to it by virtue of its status as
 an Intergovernmental Organization or submit itself to any jurisdiction.

 ***************************** LICENSE END *************************************/

// MvGeoPoints.cc,   apr03/vk


#include "MvGeoPoints.h"
#include "MvLocation.h"
#include "MvMiscelaneous.h"
#include "mars.h"
#include "inc_stl.h"
#include "Tokenizer.h"

// the precision with which we write geopoints values
#define VALUE_PRECISION (10)




bool MvGeoPointColumnInfo::operator==(const MvGeoPointColumnInfo& in)
{
    if (colNames_ != in.colNames_)
        return false;
    if (ncols_ != in.ncols_)
        return false;
    if (ncoordcols_ != in.ncoordcols_)
        return false;
    if (nvalcols_ != in.nvalcols_)
        return false;
    if (nvalcolsforcompute_ != in.nvalcolsforcompute_)
        return false;
    //if (hasStnIds_ != in.hasStnIds_)
    //    return false;
    if (colTypes_ != in.colTypes_)
        return false;

    return true;
}


//_____________________________________________________________________

MvGeoP1::MvGeoP1() :
    // it looks strange, but we first initialise gfmt_ to something that we don't want -
    // this is so that when we call format(eGeoTraditional), it will pick up the fact
    // that we're changing the format and it will allocate the values_ vector
    gfmt_(eGeoString),
    latitude_(0),
    longitude_(0),
    height_(0),
    date_(0),
    time_(0),
    vi_(0),
    strValue_("")  //-- Doxygen comments!
{
    format(eGeoTraditional, 1);
}
//_____________________________________________________________________
// Not competely nice to have essentially duplicate code for copying an
// MvGeoP1, but for the copy constructor it should be more efficient to
// do it like this, avoiding the default construction of each member.
MvGeoP1::MvGeoP1(const MvGeoP1& in) :
    gfmt_(in.gfmt_),
    latitude_(in.lat_y()),
    longitude_(in.lon_x()),
    height_(in.height()),
    date_(in.date()),
    time_(in.time()),
    values_(in.values()),
    vi_(in.vi_),
    strValue_(in.strValue())
{
    //_copy( in );
}
//_____________________________________________________________________
void MvGeoP1::_copy(const MvGeoP1& in)
{
    // at present, we should avoid calling format(in.gfmt) because that could cause unnecessary
    // reallocation of values_, and we are going to copy values_ over anyway
    gfmt_      = in.gfmt_;
    latitude_  = in.lat_y();
    longitude_ = in.lon_x();
    height_    = in.height();
    date_      = in.date();
    time_      = in.time();
    values_    = in.values();
    vi_        = in.vi_;
    strValue_  = in.strValue();
}
//_____________________________________________________________________

MvGeoP1&
MvGeoP1::operator=(const MvGeoP1& in)
{
    _copy(in);
    return *this;
}
//_____________________________________________________________________

bool MvGeoP1::operator==(const MvGeoP1& in)
{
    if (gfmt_ != in.gfmt_)
        return false;
    if (latitude_ != in.latitude_)
        return false;
    if (longitude_ != in.longitude_)
        return false;
    if (height_ != in.height_)
        return false;
    if (date_ != in.date_)
        return false;
    if (time_ != in.time_)
        return false;
    if (values_ != in.values())
        return false;
    if (strValue_ != in.strValue_)
        return false;
    // no need to test vi_
    return true;
}

//_____________________________________________________________________
// Used in the sort() function. We need to compare the values as well, otherwise
// the remove_duplicates() function will not work correctly (it compares values
// of adjacent points, so if there are multiple co-located points with different
// values, they might not be detected as duplicates unless the sorting takes
// the values into account).
bool MvGeoP1::operator<(const MvGeoP1& in) const
{
    if (latitude_ != in.latitude_)
        return latitude_ > in.latitude_;  //-- from North to South

    if (longitude_ != in.longitude_)
        return longitude_ < in.longitude_;  //-- from West to East

    if (height_ != in.height_)
        return height_ < in.height_;

    if (value() != in.value())
        return value() < in.value();

    return false;
}


//_____________________________________________________________________
// When we change the format of geopoints, we may need to resize the values array
void MvGeoP1::format(eGeoFormat fmt, size_t numvals)
{
    if (gfmt_ == fmt)  // fast return if no change needed
        return;

    gfmt_ = fmt;

    // for backwards compatibility, we allocate 2 values even for the 1-valued
    // formats. In theory, someone could create a standard geopoints variable
    // and set its value2 array from a macro, and later query it

    size_t nvalcols = 2;  // how many values (data values, not lat/lon, etc)
    if (gfmt_ == eGeoNCols)
        nvalcols = numvals;

    /*
    size_t nvals = 1;  // how many values (data values, not lat/lon, etc)

    if (gfmt_ == eGeoVectorPolar || gfmt_ == eGeoVectorXY)
        nvals = 2;
*/

    if (nvalcols != values_.size())
        values_.resize(nvalcols);
}


//_____________________________________________________________________
// Needed for GeoSubsampleFunction in bufr.cc - we can't just use the '<'
// operator, because it also compares the value, which we do not care about
// in GeoSubsampleFunction
bool MvGeoP1::latLonHeightBefore(const MvGeoP1& in) const
{
    if (latitude_ != in.latitude_)
        return latitude_ > in.latitude_;  //-- from North to South

    if (longitude_ != in.longitude_)
        return longitude_ < in.longitude_;  //-- from West to East

    if (height_ != in.height_)
        return height_ < in.height_;

    return false;
}


//_____________________________________________________________________
bool MvGeoP1::extract(const char* line, eGeoFormat& geoFmt, MvGeoPoints* gpts)
{
    char buf[255];  //-- for numeric/string check

    gfmt_     = geoFmt;
    strValue_ = "";

    istringstream myInput(line);
    if (gfmt_ == eGeoXYV)  //-- XYV is 'X_lon,Y _lat, Val' format
    {
        myInput >> longitude_ >> latitude_ >> buf;

        _stringOrNumber(buf);  // here it would not make sense to update the gobal format
                               // as eGeoString is assumed to have 6 columns

        height_ = 0;
        date_ = time_ = 0;
        value2(0);
    }
    else if (gfmt_ != eGeoNCols) {
        double d_date;  //-- in case user has floating point valued dates
        double d_time;

        if (hasVector())  //-- polar or XY vector?
        {
            myInput >> latitude_ >> longitude_ >> height_ >> d_date >> d_time >> values_[0] >> values_[1];
        }
        else  //-- TRADITIONAL: Lat/Lon/lev/dat/tim/Val
        {
            myInput >> latitude_ >> longitude_ >> height_ >> d_date >> d_time >> buf;

            _stringOrNumber(buf);

            if (gfmt_ == eGeoString)  // update the 'global' geo format?
                geoFmt = eGeoString;

            value2(0);
        }

        date_ = (long)d_date;
        time_ = (long)d_time;
    }
    else  // ncols
    {
        double d_date;  //-- in case user has floating point valued dates
        double d_time;

        // read the co-ordinate values
        for (int c = 0; c < gpts->nCoordCols(); c++) {
            eGeoColType t = gpts->colType(c);
            if (t == eGeoColStnId)
                myInput >> strValue_;
            else if (t == eGeoColLat)
                myInput >> latitude_;
            else if (t == eGeoColLon)
                myInput >> longitude_;
            else if (t == eGeoColLevel)
                myInput >> height_;
            else if (t == eGeoColDate) {
                myInput >> d_date;
                date_ = (long)d_date;
            }
            else if (t == eGeoColTime) {
                myInput >> d_time;
                time_ = (long)d_time;
            }
        }


        // possible re-allocation of values vector
        // - note that this should only need to be done for the first gpt1 read from the file
        if (gpts->nValCols() != (int)values_.size())
            values_.resize(gpts->nValCols());


        // read the 'value' values
        int vi = -1;
        while (myInput >> values_[++vi]) {
        }

        // first time round, we store the number of columns; subsequently, we check that all rows have the same number of columns
        if (gpts->nValCols() == -1) {
            gpts->nValCols(vi);
            gpts->nValColsForCompute(gpts->nValCols());
        }
        else {
            if (gpts->nValCols() != vi) {
                marslog(LOG_EROR, "Geopoints file has different numbers of columns: the first offending row is:");
                marslog(LOG_EROR, "%s", line);
                return false;
            }
        }
    }

    return true;
}

//_____________________________________________________________________
string
MvGeoP1::column(size_t col, MvGeoPointColumnInfo& colinfo, int& type)
{
    size_t totalcols = colinfo.ncoordcols_ + colinfo.nvalcols_;
    type             = eGeoVDouble;

    if (col < 0 || col >= totalcols)
        return "BAD COLUMN INDEX";

    // is the column index one of the value columns?
    eGeoColType et = colinfo.colTypes_[col];
    if (!MvGeoPoints::colTypeIsCoord(et)) {
        col -= colinfo.ncoordcols_;  // index into value columns now
        if (col < (size_t)colinfo.nvalcols_) {
            return metview::toString(ivalue(col));
        }
    }
    else {
        // no, it's a coordinate column

        switch (et) {
            case eGeoColStnId:
                type = eGeoVString;
                return strValue();
            case eGeoColLat:
                return metview::toString(lat_y());
            case eGeoColLon:
                return metview::toString(lon_x());
            case eGeoColLevel:
                return metview::toString(height());
            case eGeoColDate: {
                type = eGeoVLong;
                return metview::toString(date());
            }
            case eGeoColTime: {
                type = eGeoVLong;
                return metview::toString(time());
            }
            default:
                return "UNKNOWN";
        }
    }

    return string("FORMAT NOT DEFINED");
}

//_____________________________________________________________________
void MvGeoP1::_stringOrNumber(char* buf)
{
    bool isNumeric = true;
    int dcnt       = 0;
    char* p        = buf;

    if (*p == '-' || *p == '+')  //-- sign is OK
        ++p;

    if (*p && isalpha(*p)) {
        isNumeric = false;  //-- cannot be a number
    }
    else {
        dcnt = _countDigits(p);      //-- (leading) digits?
        if (dcnt == 0 && *p != '.')  //-- 0 digits => only decimal point is OK
            isNumeric = false;

        if (isNumeric && *p == '.') {
            ++p;
            dcnt += _countDigits(p);  //-- trailing digits?
            if (dcnt == 0)
                isNumeric = false;  //-- decimal point without digits
        }

        if (isNumeric && (*p == 'e' || *p == 'E')) {
            ++p;
            if (*p == '-' || *p == '+')  //-- exponent sign is OK
                ++p;
            if (_countDigits(p) == 0)
                isNumeric = false;  //-- digits must follow
        }

        if (isNumeric && *p && isgraph(*p))
            isNumeric = false;  //-- must not follow by a printable char
    }

    if (isNumeric) {
        value(atof(buf));  //-- is numeric: convert!
    }
    else {
        strValue_ = buf;  //-- is string: copy!
        value(0);
        gfmt_ = eGeoString;
    }
}

//_____________________________________________________________________
int MvGeoP1::_countDigits(char*& p)
{
    int dcnt = 0;

    while (p && *p) {
        if (isdigit(*p))
            ++dcnt;
        else
            break;

        ++p;
    }

    return dcnt;
}

//_____________________________________________________________________
bool MvGeoP1::sameLocation(const MvGeoP1& in)
{
    return (latitude_ == in.lat_y() && longitude_ == in.lon_x() && height_ == in.height_) &&
           (!latlon_missing() && !in.latlon_missing());
}

//_____________________________________________________________________
void MvGeoP1::location(double lat, double lon)
{
    latitude_ = lat;
    if (latitude_ != GEOPOINTS_MISSING_VALUE) {
        if (latitude_ > 90) {
            marslog(LOG_INFO, "Geopoint latitude value %g forced to be 90", latitude_);
            latitude_ = 90;
        }
        if (latitude_ < -90) {
            marslog(LOG_INFO, "Geopoint latitude value %g forced to be -90", latitude_);
            latitude_ = -90;
        }
    }

    longitude_ = lon;
    if (longitude_ != GEOPOINTS_MISSING_VALUE) {
        while (longitude_ < -180)
            longitude_ += 360;
        while (longitude_ > 360)
            longitude_ -= 360;
    }
}


// -- MvGeop1Writer
// -- Tiny utility class to enable the writing of a single geopoint to file
// -- We need this because it is a quick and easy way to allow the GP1 class
// -- access to the column info that is stored in the MvGeoPoints class
// -- without having to store a pointer or reference to that class inside
// -- every GP1 object. And the << operator will not allow us to pass arguments
// -- or other pointers, so this seemed like an ok solution.

class MvGeop1Writer
{
    //! Friend function to write one point (one line) into a MvGeoPoints file
    friend ostream& operator<<(ostream& aStream, const MvGeop1Writer& gpw);

public:
    MvGeop1Writer(MvGeoP1& gpIn, MvGeoPointColumnInfo& colInfoIn) :
        gp_(gpIn),
        colInfo_(colInfoIn){};

    MvGeoP1& gp_;
    MvGeoPointColumnInfo& colInfo_;
};


//_____________________________________________________________________

ostream& operator<<(ostream& aStream, const MvGeop1Writer& gpw)
{
    MvGeoP1& gp                   = gpw.gp_;
    MvGeoPointColumnInfo& colInfo = gpw.colInfo_;
    const char cSeparator[]       = "\t";


    // I (IR) don't know why this code was here, but originally, a space character
    // was being written to the start of each value line. I have now (July 2018)
    // removed this, for time and file size efficiency.

    //const char cStartOfLine[] = " ";
    //aStream << cStartOfLine;


    // General note about precision settings: we must be careful if a
    // user-callable function to set the precision is created. This is
    // because we need to ensure that missing values are still correctly
    // written and read. See the value defined for missing values in
    // MvGeoPoints.h to see how many decimal places are required for
    // faithful reading and writing of missing values.
    // See also MvGeoPoints::write, as this also uses the precision value.


    int myOldPrec = aStream.precision();  //-- store current floating point precision
    aStream.precision(7);                 //-- default of 6 digits may not be enough

    if (gp.format() == eGeoXYV) {
        aStream << gp.lon_x() << cSeparator
                << gp.lat_y() << cSeparator;
    }
    else if (gp.format() == eGeoNCols) {
        for (std::vector<eGeoColType>::iterator it = colInfo.colTypes_.begin(); it != colInfo.colTypes_.end(); ++it) {
            switch (*it) {
                case eGeoColLat:
                    aStream << gp.lat_y() << cSeparator;
                    break;
                case eGeoColLon:
                    aStream << gp.lon_x() << cSeparator;
                    break;
                case eGeoColLevel:
                    aStream << gp.height() << cSeparator;
                    break;
                case eGeoColDate:
                    aStream << gp.date() << cSeparator;
                    break;
                case eGeoColTime:
                    aStream << gp.time() << cSeparator;
                    break;
                case eGeoColStnId:
                    if (colInfo.hasStnIds_)
                        aStream << gp.strValueForWritingToFile() << cSeparator;
                    break;
                default:
                    break;
            }
        }
    }
    else {
        aStream << gp.lat_y() << cSeparator
                << gp.lon_x() << cSeparator
                << gp.height() << cSeparator
                << gp.date() << cSeparator
                << gp.time() << cSeparator;
    }

    aStream.precision(VALUE_PRECISION);  //-- value may need even more precision

    switch (gp.format()) {
        case eGeoTraditional:
        case eGeoXYV:
            aStream << gp.value();
            break;

        case eGeoString:
            aStream << gp.strValue().c_str();
            break;

        case eGeoVectorPolar:
        case eGeoVectorXY:
            aStream << gp.speed() << cSeparator
                    << gp.direc();
            break;

        case eGeoNCols:
            int i;
            for (i = 0; i < colInfo.nvalcols_ - 1; i++)
                aStream << gp.ivalue(i) << cSeparator;

            aStream << gp.ivalue(i);  // last value without the separator
            break;

        default:
            break;
    }

    aStream.precision(myOldPrec);  //-- revert back to original precision

    return aStream;
}


//_____________________________________________________________________
//_____________________________________________________________________
//_________________________MvGeoPoints_________________________________
//_____________________________________________________________________
//_____________________________________________________________________


//_____________________________________________________________________
// return the coordinate column map - if it's the first time, populate it
// - this is a static function, and a static member variable

std::map<std::string, eGeoColType> MvGeoPoints::coordColMap_;

const std::map<std::string, eGeoColType>& MvGeoPoints::coordColMap()
{
    if (coordColMap_.empty()) {
        coordColMap_["latitude"]  = eGeoColLat;
        coordColMap_["longitude"] = eGeoColLon;
        coordColMap_["level"]     = eGeoColLevel;
        coordColMap_["date"]      = eGeoColDate;
        coordColMap_["time"]      = eGeoColTime;
        coordColMap_["stnid"]     = eGeoColStnId;
        coordColMap_["value"]     = eGeoColValue;
        coordColMap_["value2"]    = eGeoColValue2;
    }
    return coordColMap_;
}

eGeoColType MvGeoPoints::colTypeFromName(const std::string& name, bool failIfUnknown)
{
    auto& colmap = coordColMap();
    auto it      = colmap.find(name);
    if (it == colmap.end())
        if (failIfUnknown)
            return eGeoColError;
        else
            return eGeoColValue;
    else
        return it->second;
}


bool MvGeoPoints::colTypeIsCoord(eGeoColType t)
{
    return (t != eGeoColValue && t != eGeoColValue2 && t != eGeoColError);
}


//_____________________________________________________________________

MvGeoPoints::MvGeoPoints(int count, int numvals, eGeoFormat efmt, bool init) :
    gfmt_(efmt),
    count_(count),
    path_("/file/name/not/given"),
    dbSystem_(""),
    dbPath_("")
{
    this->setFormat();
    metadata_.clear();
    newReservedSize(count, init);
    if (init)
        format(efmt, numvals);
}


//_____________________________________________________________________

MvGeoPoints::MvGeoPoints(int count, const MvGeoPointColumnInfo &colInfo, eGeoFormat efmt, bool init) :
    MvGeoPoints(count, colInfo.nvalcols_, efmt, init) // C++11
{
    // do the same as the above constructor, but also copy across all the column information
    colInfo_ = colInfo;
}


//_____________________________________________________________________

MvGeoPoints::MvGeoPoints(long count, bool init) :
    gfmt_(eGeoTraditional),
    count_(count),
    path_("/file/name/not/given"),
    dbSystem_(""),
    dbPath_("")
{
    if (init) {
        pts_.resize(count_);  // all constructor on each object
    }
    else {
        pts_.reserve(count_);  // just reserve memory, no constructors
        this->setFormat();
        metadata_.clear();
    }
}

//_____________________________________________________________________

MvGeoPoints::MvGeoPoints(const MvGeoPoints& gp)
{
    _copy(gp);
}

//_____________________________________________________________________

MvGeoPoints::MvGeoPoints(const char* name, const int nmax) :
    count_(0)
{
    path_ = name;
    load(nmax);
}

//_____________________________________________________________________

MvGeoPoints::~MvGeoPoints()
{
}
//_____________________________________________________________________
void MvGeoPoints::_copy(const MvGeoPoints& gp)
{
    unload();

    gfmt_          = gp.format();
    count_         = gp.count();
    sgfmt_         = gp.format();
    colInfo_       = gp.colInfo_;
    dbSystem_      = gp.dbSystem();
    dbColumn_      = gp.dbColumn();
    dbColumnAlias_ = gp.dbColumnAlias();
    dbPath_        = gp.dbPath();
    dbQuery_       = gp.dbQuery();
    metadata_      = gp.metadataConst();

    if (count_ > 0) {
        pts_.reserve(count_);  // reserve and push to avoid constructor calls
        for (int p = 0; p < count_; ++p) {
            const MvGeoP1& thisp(gp.const_element_ref(p));
            pts_.push_back(thisp);
        }
    }
    else {
        unload();
    }
}

//_____________________________________________________________________
MvGeoPoints&
MvGeoPoints::operator=(const MvGeoPoints& gp)
{
    if (&gp == this)
        return *this;

    unload();
    _copy(gp);

    return *this;
}

//_____________________________________________________________________
void MvGeoPoints::newReservedSize(long size, bool init)
{
    unload();
    count_ = size;

    if (init)
        pts_.resize(count_);  // calls constructors
    else
        pts_.reserve(count_);  // just allocates memory
}
//_____________________________________________________________________
void MvGeoPoints::format(eGeoFormat fmt, size_t numvals)
{
    gfmt_ = fmt;
    if (gfmt_ == eGeoNCols)
        nValCols(numvals);

    if (count() > 0) {
        for (int p = 0; p < count(); ++p) {
            pts_[p].format(fmt, numvals);  //-- change format for each point
        }
    }
}

//_____________________________________________________________________
bool MvGeoPoints::load(const char* path)
{
    if (path_ != path && count_ > 0)
        unload();  //-- unload if different data exist

    path_ = path;

    return load();
}

//_____________________________________________________________________
bool MvGeoPoints::load(const int nmax)
{
    if (count_)
        return true;

    ifstream f(path_.c_str());
    if (!f) {
        marslog(LOG_EROR, "Could not open geopoints file: %s", path_.c_str());
        return false;
    }

    return load(f);
}

void MvGeoPoints::addColName(std::string name, bool markStnIdAsUsed, bool addToFront)
{
    if (addToFront)
        colInfo_.colNames_.insert(colInfo_.colNames_.begin(), name);
    else
        colInfo_.colNames_.push_back(name);
    eGeoColType colType = colTypeFromName(name);
    addColType(colType, addToFront);
    if (markStnIdAsUsed && (colType == eGeoColStnId))
        hasStnIds(true);
}


void MvGeoPoints::addColType(eGeoColType t, bool addToFront)
{
    if (addToFront)
        colInfo_.colTypes_.insert(colInfo_.colTypes_.begin(), t);
    else
        colInfo_.colTypes_.push_back(t);
}



//_____________________________________________________________________
// The line should be something like this:
// stnid   lat   time  long    date
// and can contain, optionally, the names of the value columns, e.g.
// stnid   lat   time  long    date  temperature  ozone risk_factor
// returns false if there was an error in parsing the line
bool MvGeoPoints::parseColumnNames(char* line)
{
    // populate colNames_, ncols_ and nvalcols_

    // tokenise into a list of strings
    string sbuf(line);
    vector<string> sv;
    Tokenizer parse(" \t");
    parse(sbuf, sv);

    // for each string on the line
    colInfo_.ncoordcols_ = 0;
    bool valcols         = false;  // co-ordinate cols first, then value cols
    clearColNames();
    clearColTypes();

    for (size_t i = 0; i < sv.size(); i++) {
        std::string& name = sv[i];
        addColName(name, true);

        // is this a standard co-ordinate column name?
        eGeoColType colType = colTypeFromName(name, true);
        if (colTypeIsCoord(colType)) {
            if (valcols) {
                marslog(LOG_EROR, "Error parsing geopoints #COLUMNS line: all co-ordinate columns must come before the value columns - %s", name.c_str());
                return false;
            }
            colInfo_.ncoordcols_++;
        }
        else  // no, it must be a user-defined value name
        {
            valcols = true;
        }
    }


    // check that the essential columns exist
    if (std::find(colInfo_.colTypes_.begin(), colInfo_.colTypes_.end(), eGeoColLat) == colInfo_.colTypes_.end() ||
        std::find(colInfo_.colTypes_.begin(), colInfo_.colTypes_.end(), eGeoColLon) == colInfo_.colTypes_.end()) {
        marslog(LOG_EROR, "NCOLS-based geopoints must contain latitude and longitude columns");
        return false;
    }

    return true;
}

//_____________________________________________________________________
// Count how many value columns there are on the first data line
// - we can't just take the number of value header strings, because we
// do not require all value columns to have names
int MvGeoPoints::countValueColumns(char* line, int numCoordCols)
{
    // tokenise into a list of strings
    string sbuf(line);
    vector<string> sv;
    Tokenizer parse(" \t");
    parse(sbuf, sv);
    return (int)sv.size() - numCoordCols;
}


//_____________________________________________________________________
// If we have unnamed value columns, set their names to be empty strings
void MvGeoPoints::fillValueColumnNames()
{
    int numUnnamedCols = totalcols() - colNames().size();
    for (int i = 0; i < numUnnamedCols; i++) {
        char buf[128];
        sprintf(buf, "_v%d", i + 1);  // first unnamed col is _v1, next is _v2
        addColName(string(buf));
    }
}


void MvGeoPoints::ensureNColsHasStnIds()
{
    // if NCOLS format, and no stnid, then add to front of list
    if (gfmt_ == eGeoNCols &&
       (std::find(colInfo_.colTypes_.begin(), colInfo_.colTypes_.end(), eGeoColStnId) == colInfo_.colTypes_.end())) {
        addColName("stnid", false, true);
        colInfo_.ncoordcols_++;
       }
}


//_____________________________________________________________________
bool MvGeoPoints::load(ifstream& f, const int nmax)
{
    char line[10240];
    int n        = 0;
    int numPts   = 0;
    streampos sp = f.tellg();


    if (nmax == 0) {
        //-- first count the lines
        while (f.getline(line, sizeof(line)))
            numPts++;
    }
    else
        numPts = nmax;

    unload();

    // resizing strategy: if we resize to the full size here, we'll end up calling the default
    // constructor for every geopoint, and then over-writing when we read from disk; unfortunately
    // we can't just reserve the memory, because that causes problems when we overwrite; but what
    // we can do is read the first one from disk, then resize the vector with copies of that one.
    // in this way, we can avoid destroying and reconstructing the values_ vector for each point
    pts_.resize(1);

    f.clear();
    f.seekg(sp);
    //f.seekg(0, ios::beg);

    gfmt_ = eGeoTraditional;

    bool metadata = false;
    bool db_info  = false;
    bool db_query = false;
    bool colnames = false;

    while (f.getline(line, sizeof(line))) {
        if (strncmp(line, "#DATA", 5) == 0) {
            break;
        }
        else if (strncmp(line, "#FORMAT ", 8) == 0) {
            const char* fp = line + 7;
            while (fp && *fp == ' ')
                ++fp;

            if (strncmp(fp, "POLAR_VECTOR", 12) == 0) {
                gfmt_ = eGeoVectorPolar;  //-- polar vector extension
            }
            else if (strncmp(fp, "XY_VECTOR", 9) == 0) {
                gfmt_ = eGeoVectorXY;  //-- cartesian vector extension
            }
            else if (strncmp(fp, "XYV", 3) == 0) {
                gfmt_ = eGeoXYV;  //-- "French" extension
            }
            else if (strncmp(fp, "LLV", 3) == 0) {
                gfmt_ = eGeoXYV;  //-- old name for XYV
            }
            else if (strncmp(fp, "NCOLS", 3) == 0) {
                gfmt_ = eGeoNCols;  //-- flexible format with unlimited columns for values
            }
            else {
                marslog(LOG_EROR, "Unknown geopoints format: %s", fp);
            }

            // Set format info
            this->setFormat();
        }

        else if (strncmp(line, "#COLUMNS", 7) == 0) {
            colnames = true;  // the column names will be on the next line
        }

        else if (colnames) {
            if (!parseColumnNames(line)) {
                marslog(LOG_EROR, "Error parsing geopoints column names");
                return false;
            }
            colnames = false;
        }


        else if (strncmp(line, "#METADATA", 9) == 0)  // start of meta-data
        {
            metadata = true;
            db_info  = false;
        }

        else if (metadata == true)  // within the meta-data block
        {
            string sbuf(line);
            vector<string> sv;
            Tokenizer parse("=");
            parse(sbuf, sv);
            if (sv.size() == 2)
                metadata_[sv[0]] = sv[1];  // store this line of metadata
        }


        //Information about the database, query etc. that
        //generated the geopoints file
        else if (strncmp(line, "#DB_INFO ", 8) == 0) {
            db_info  = true;
            metadata = false;
        }

        else if (db_info == true && strstr(line, "DB_SYSTEM:") != 0) {
            string sbuf(line);
            string::size_type pos = sbuf.find("DB_SYSTEM:");
            sbuf                  = sbuf.substr(pos + 10);
            dbSystem_             = sbuf;
        }

        else if (db_info == true && strstr(line, "DB_COLUMN:") != 0) {
            string sbuf(line);
            string::size_type pos = sbuf.find("DB_COLUMN:");
            sbuf                  = sbuf.substr(pos + 10);
            vector<string> sv;

            Tokenizer parse(";");
            parse(sbuf, sv);

            if (gfmt_ == eGeoTraditional && sv.size() == 6) {
                dbColumn_["lat"]   = sv[0];
                dbColumn_["lon"]   = sv[1];
                dbColumn_["level"] = sv[2];
                dbColumn_["date"]  = sv[3];
                dbColumn_["time"]  = sv[4];
                dbColumn_["value"] = sv[5];
            }
            else if (gfmt_ == eGeoXYV && sv.size() == 3) {
                dbColumn_["lon"]   = sv[0];
                dbColumn_["lat"]   = sv[1];
                dbColumn_["value"] = sv[2];
            }
            else if ((gfmt_ == eGeoVectorPolar || gfmt_ == eGeoVectorXY) && sv.size() == 7) {
                dbColumn_["lat"]    = sv[0];
                dbColumn_["lon"]    = sv[1];
                dbColumn_["level"]  = sv[2];
                dbColumn_["date"]   = sv[3];
                dbColumn_["time"]   = sv[4];
                dbColumn_["value"]  = sv[5];
                dbColumn_["value2"] = sv[6];
            }
        }

        else if (db_info == true && strstr(line, "DB_COLUMN_ALIAS:") != 0) {
            string sbuf(line);
            string::size_type pos = sbuf.find("DB_COLUMN_ALIAS:");
            sbuf                  = sbuf.substr(pos + 16);

            vector<string> sv;
            Tokenizer parse(";");
            parse(sbuf, sv);

            if (gfmt_ == eGeoTraditional && sv.size() == 6) {
                dbColumnAlias_["lat"]   = sv[0];
                dbColumnAlias_["lon"]   = sv[1];
                dbColumnAlias_["level"] = sv[2];
                dbColumnAlias_["date"]  = sv[3];
                dbColumnAlias_["time"]  = sv[4];
                dbColumnAlias_["value"] = sv[5];
            }
            else if (gfmt_ == eGeoXYV && sv.size() == 3) {
                dbColumnAlias_["lon"]   = sv[0];
                dbColumnAlias_["lat"]   = sv[1];
                dbColumnAlias_["value"] = sv[2];
            }
            else if ((gfmt_ == eGeoVectorPolar || gfmt_ == eGeoVectorXY) && sv.size() == 7) {
                dbColumnAlias_["lat"]    = sv[0];
                dbColumnAlias_["lon"]    = sv[1];
                dbColumnAlias_["level"]  = sv[2];
                dbColumnAlias_["date"]   = sv[3];
                dbColumnAlias_["time"]   = sv[4];
                dbColumnAlias_["value"]  = sv[5];
                dbColumnAlias_["value2"] = sv[6];
            }
        }

        else if (db_info == true && strstr(line, "DB_PATH:") != 0) {
            string sbuf(line);
            string::size_type pos = sbuf.find("DB_PATH:");
            sbuf                  = sbuf.substr(pos + 8);
            dbPath_               = sbuf;
        }

        else if (db_info == true && strstr(line, "DB_QUERY_BEGIN") != 0) {
            db_query = true;
        }

        else if (db_info == true && db_query == true) {
            dbQuery_.push_back(line);
        }
        else if (strstr(line, "DB_QUERY_END") != 0) {
            db_query = true;
        }
    }

    db_info  = false;
    db_query = false;
    metadata = false;

    // Read data
    if (nmax == 0) {
        bool inSameGpts = true;  // used when in a geopointset file
        while (f.getline(line, sizeof(line)) && inSameGpts) {
            if ((*line != '#') && (strlen(line) > 4)) {
                if (n == 0 && gfmt_ == eGeoNCols) {
                    // first time only - count the number of values
                    nValCols(-1);
                    nValCols(countValueColumns(line, colInfo_.ncoordcols_));
                    fillValueColumnNames();
                }

                if (!pts_[n].extract(line, gfmt_, this)) {
                    marslog(LOG_EROR, "Error parsing geopoints file %s", path_.c_str());
                    return false;
                }
                if (n == 0) {
                    MvGeoP1 firstgp1 = pts_[0];  // resize using the first as a template - see comments at top of function
                    pts_.resize(numPts, firstgp1);
                }
                n++;
            }
            else if (!strncmp(line, "#GEO", 4))  // start of new geopoints file
            {
                inSameGpts = false;
            }
        }
    }
    else {
        for (int i = 0; i < nmax; i++) {
            if (!f.getline(line, sizeof(line))) {
                marslog(LOG_EROR, "Geopoints file has less data than expected: %s", path_.c_str());
                return false;
            }

            if ((*line != '#') && (strlen(line) > 4)) {
                if (!pts_[n].extract(line, gfmt_, this)) {
                    marslog(LOG_EROR, "Error parsing geopoints file %s", path_.c_str());
                    return false;
                }
                n++;
            }
        }
    }

    ensureNColsHasStnIds();
    count(n);

    return true;
}

//_____________________________________________________________________
void MvGeoPoints::unload()
{
    pts_.clear();
    vector<MvGeoP1>().swap(pts_);  // ensure memory is released
    count_ = 0;
}

//_____________________________________________________________________
bool MvGeoPoints::write(const char* filename)
{
    int nPreviousPrecision;

    ofstream fout(filename);
    if (!fout) {
        marslog(LOG_EROR, "Unable to open geopoints file for writing: %s", filename);
        return false;
    }

    fout << "#GEO\n";

    switch (gfmt_) {
        case eGeoVectorPolar:
            fout << "#FORMAT POLAR_VECTOR\n"
                 << "# lat\tlon\theight\tdate\t\ttime\tspeed\tdirection\n";
            break;

        case eGeoVectorXY:
            fout << "#FORMAT XY_VECTOR\n"
                 << "# lat\tlon\theight\tdate\t\ttime\tu\tv\n";
            break;

        case eGeoXYV:
            fout << "#FORMAT XYV\n"
                 << "# lon-x\tlat-y\tvalue\n";
            break;

        case eGeoNCols:
            fout << "#FORMAT NCOLS\n"
                 << "#COLUMNS\n";
            for (size_t i = 0; i < colInfo_.colNames_.size(); i++) {
                // don't write out the stnid column if there are no station ids
                if (colInfo_.colTypes_[i] == eGeoColStnId && !colInfo_.hasStnIds_)
                    continue;
                fout << colInfo_.colNames_[i].c_str() << "\t";
            }
            fout << "\n";
            break;

        default:
            //-- this is for both eGeoTraditional and eGeoString
            //-- no "#FORMAT" line is needed
            fout << "# lat\tlon\theight\tdate\t\ttime\tvalue\n";
            break;
    }


    // Insert a line that will tell the user which value represents missing points.
    // Take care not to disturb the floating-point precision, but we need to use
    // the correct one that will actually be used in the file.
    // Note that the storing and restoring of the original precision value
    // in the output stream is probably unnecessary, but it is done just once
    // per geopoints file and so should be insignificant and allows the
    // implementation of the << operator on a single geopoint to be changed
    // without unexpected side-effects.

    nPreviousPrecision = fout.precision();
    fout.precision(VALUE_PRECISION);
    fout << "# Missing values represented by " << GEOPOINTS_MISSING_VALUE
         << " (not user-changeable)" << endl;
    fout.precision(nPreviousPrecision);


    // metadata, if there is any

    const metadata_t md = metadataConst();
    if (!md.empty()) {
        fout << "#METADATA" << endl;
        metadata_t::const_iterator it = md.begin();
        while (it != md.end()) {
            std::string key = it->first;
            MvVariant val   = it->second;
            fout << key << "=" << val.toString() << endl;
            it++;
        }
    }

    // start the data section

    fout << "#DATA" << endl;

    MvGeop1Writer gpw(pts_[0], colInfo_);

    for (int p = 0; p < count_; ++p) {
        gpw.gp_ = pts_[p];
        fout << gpw
             << endl;
    }

    return true;
}

//_____________________________________________________________________
MvGeoP1
MvGeoPoints::nearestPoint(double lat_y, double lon_x) const
{
    if (count_ == 0)
        return MvGeoP1();  //-- should we...

    MvLocation myInputLoc(lat_y, lon_x);
    MvLocation myFirstLoc(pts_[0].lat_y(), pts_[0].lon_x());

    double myShortestDist = myInputLoc.distanceInMeters(myFirstLoc);
    long myNearestPi      = 0;

    for (int p = 1; p < count_; ++p) {
        MvLocation myCurrentLoc(pts_[p].lat_y(), pts_[p].lon_x());
        double myCurrentDist = myInputLoc.distanceInMeters(myCurrentLoc);

        if (myCurrentDist < myShortestDist) {
            myShortestDist = myCurrentDist;
            myNearestPi    = p;
        }
    }

    return pts_[myNearestPi];
}


//_____________________________________________________________________
// MvGeoPoints::indexOfFirstValidPoint
// Returns the index of the first geopoint that is valid in the set.
// If none are valid, then -1 is returned.
// Note that this function only considers the first value in each
// geopoint, ignoring value2.

long MvGeoPoints::indexOfFirstValidPoint(size_t c) const
{
    int i;

    for (i = 0; i < count_; i++) {
        if (!pts_[i].value_missing(c)) {
            return i;
        }
    }

    // if we got to here, then there are no valid points

    return -1;
}

//_____________________________________________________________________
// MvGeoPoints::sort()
// Sorts points geographically - from North to South, West to East
void MvGeoPoints::sort()
{
    if (count() < 2)  //-- no need to sort if empty or only one point
        return;

    //-- to make sort faster for huge files, copy input geopoints into
    //-- several latitude band lists;
    //-- here we define the width and the number of these latitude bands
    const double cLatBandSize = 1.0;
    const int cLatBandCount   = (int)(180.0 / cLatBandSize) + 1;

    //-- STL provides tools for sorting
    vector<list<MvGeoP1> > LatListVec;
    list<MvGeoP1> emptyList;
    LatListVec.assign(cLatBandCount + 1, emptyList);

    //-- first coarse distribution into STL lists that are stored in STL vector
    for (int s = 0; s < count(); ++s) {
        int band = cLatBandCount - int((pts_[s].lat_y() + 90.5) / cLatBandSize);

        //-- if invalid latitude band value then sort into head or tail
        if (band < 0)
            band = 0;
        else if (band > cLatBandCount)
            band = cLatBandCount;

        LatListVec[band].push_back(pts_[s]);
    }

    vector<MvGeoP1> work;
    work.reserve(count_);  // reserve so that we don't call default constructor unnecessarily

    //-- sort each latitude band STL list and copy to output
    for (int vecList = 0; vecList < cLatBandCount + 1; ++vecList) {
        list<MvGeoP1> curList = LatListVec[vecList];
        if (!curList.empty()) {
            curList.sort();

            for (list<MvGeoP1>::iterator p = curList.begin(); p != curList.end(); ++p) {
                work.push_back(*p);
            }

            curList.clear();
        }
    }

    pts_.swap(work);  // put our result into pts_
}

//_____________________________________________________________________
void MvGeoPoints::removeDuplicates()
{
    if (count() > 0) {
        sort();

        vector<MvGeoP1> work;
        work.reserve(count_);      // reserve so that we don't call default constructor unnecessarily
        MvGeoP1* curr = &pts_[0];  //-- store now in case there is just 1 point
        MvGeoP1* prev = &pts_[0];  //-- store the first point

        for (int p = 1; p < count_; ++p)  //-- start from the second point
        {
            curr = &pts_[p];        //-- current geopoint
            if (!(*curr == *prev))  //-- no != operator defined
            {
                work.push_back(*prev);  //-- points non-equal => copy prev
            }
            prev = curr;  //-- store current as previous
        }

        work.push_back(*curr);  //-- last point cannot be duplicate

        pts_.swap(work);  //-- swap the temp result with the pts_

        count_ = pts_.size();  //-- adjust current point count

        // the following line removed at the request of Mark Rodwell
        // marslog(LOG_INFO, "MvGeoPoints::removeDuplicates: %d duplicates removed", iRem);
    }
}
//_____________________________________________________________________
void MvGeoPoints::offset(double latOffset, double lonOffset)
{
    if (count() > 0) {
        for (int p = 0; p < count_; ++p) {
            MvGeoP1& pt = pts_[p];
            double actualLatOffset = (pt.lat_y() == GEOPOINTS_MISSING_VALUE) ? 0.0 : latOffset;
            double actualLonOffset = (pt.lon_x() == GEOPOINTS_MISSING_VALUE) ? 0.0 : lonOffset;
            pts_[p].location(pt.lat_y() + actualLatOffset, pt.lon_x() + actualLonOffset);
        }
    }
}

int MvGeoPoints::indexOfNamedValue(std::string& name)
{
    // e.g. if there are 4 co-ordinate columns, then the first value column index is 4 (0-based index)
    // which will be index 0 into the values array, so we always need to subtract the number of co-ordinate
    // columns from the result
    size_t index = std::find(colInfo_.colNames_.begin(), colInfo_.colNames_.end(), name) - colInfo_.colNames_.begin();
    if (index < colInfo_.colNames_.size())
        return index - colInfo_.ncoordcols_;
    else
        return -1;
}

vector<string> MvGeoPoints::valueColNames() const
{
    vector<string> result;
    for (size_t i = 0; i < colInfo_.colTypes_.size(); i++) {
        eGeoColType colType = colInfo_.colTypes_[i];
        if (!colTypeIsCoord(colType)) {
            std::string name = colName(i);
            result.push_back(name);
        }
    }
    return result;
}

vector<string> MvGeoPoints::usedColNames() const
{
    return colNames();
}

void MvGeoPoints::setFormat()
{
    // Clean the structure
    if (colInfo_.colNames_.size())
        clearColNames();

    if (colInfo_.colTypes_.size())
        colInfo_.colTypes_.clear();

    colInfo_.ncoordcols_         = 5;  // should be 5, but 4 will give us numvals=2 for backwards compatibility
    colInfo_.nvalcolsforcompute_ = 1;  // all formats except xy_vector and ncols only operate on one column
    hasStnIds(false);                  // assume they're not there unless we read them or the user sets them


    if (gfmt_ == eGeoTraditional || gfmt_ == eGeoString) {
        sgfmt_          = "Traditional";
        colInfo_.ncols_ = 6;
        nValCols(1);
        colInfo_.colNames_.reserve(colInfo_.ncols_);
        addColName("latitude");
        addColName("longitude");
        addColName("level");
        addColName("date");
        addColName("time");
        addColName("value");
    }
    else if (gfmt_ == eGeoXYV) {
        sgfmt_          = "XYV";
        colInfo_.ncols_ = 3;
        nValCols(1);
        colInfo_.ncoordcols_ = 2;
        colInfo_.colNames_.reserve(colInfo_.ncols_);
        addColName("longitude");
        addColName("latitude");
        addColName("value");
    }
    else if (gfmt_ == eGeoVectorPolar) {
        sgfmt_          = "Polar_Vector";
        colInfo_.ncols_ = 7;
        nValCols(2);
        colInfo_.nvalcolsforcompute_ = 1;
        colInfo_.colNames_.reserve(colInfo_.ncols_);
        addColName("latitude");
        addColName("longitude");
        addColName("level");
        addColName("date");
        addColName("time");
        addColName("magnitude");
        addColName("angle");
    }
    else if (gfmt_ == eGeoVectorXY) {
        sgfmt_          = "XY_Vector";
        colInfo_.ncols_ = 7;
        nValCols(2);
        colInfo_.nvalcolsforcompute_ = 2;
        colInfo_.colNames_.reserve(colInfo_.ncols_);
        addColName("latitude");
        addColName("longitude");
        addColName("level");
        addColName("date");
        addColName("time");
        addColName("x-comp");
        addColName("y-comp");
    }

    else if (gfmt_ == eGeoNCols) {
        sgfmt_          = "NCols";
        colInfo_.ncols_ = 7;
        nValCols(0);
        colInfo_.ncoordcols_ = 6;
        colInfo_.colNames_.reserve(colInfo_.ncols_);
        addColName("stnid");
        addColName("latitude");
        addColName("longitude");
        addColName("level");
        addColName("date");
        addColName("time");
    }


    return;
}

string MvGeoPoints::value(long row, int col, int& type)
{
    return pts_[row].column(col, colInfo_, type);
}
