// Copyright 2005, 2006 - Morten Nielsen (www.iter.dk)
//
// This file is part of SharpMap.
// SharpMap is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// SharpMap is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public License
// along with SharpMap; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// Note:
// Good stuff on DBase format: http://www.clicketyclick.dk/databases/xbase/format/
using System;
using System.Data;
using System.IO;
using GeoAPI.Extensions.Feature;
using NetTopologySuite.Extensions.Features;
namespace SharpMap.Data.Providers
{
public class ShapeFileFeature : Feature
{
internal DbaseReader AttributeReader;
private IFeatureAttributeCollection attributes;
public override IFeatureAttributeCollection Attributes
{
get
{
if (attributes == null)
{
attributes = new DictionaryFeatureAttributeCollection();
// TODO: speed-up this (memory-mapped file)
AttributeReader.StartReading((uint)Oid);
for (int i = 0; i < AttributeReader.DbaseColumns.Length; i++)
{
DbaseReader.DbaseField column = AttributeReader.DbaseColumns[i];
attributes[column.ColumnName] = AttributeReader.ReadDbfValue(column);
}
}
return attributes;
}
set { attributes = value; }
}
public int Oid { get; set; }
protected bool Equals(ShapeFileFeature other)
{
return Geometry.EqualsExact(other.Geometry);
}
public override bool Equals(object obj)
{
if (ReferenceEquals(null, obj)) return false;
if (ReferenceEquals(this, obj)) return true;
if (obj.GetType() != this.GetType()) return false;
return Equals((ShapeFileFeature) obj);
}
public override int GetHashCode()
{
int result = 17;
result = 37 * result + Geometry.GetHashCode();
return result;
}
}
internal class DbaseReader : IDisposable
{
internal struct DbaseField
{
public string ColumnName;
public Type DataType;
public int Address;
public int Length;
public int Decimals;
}
private DateTime _lastUpdate;
private int _NumberOfRecords;
private Int16 _HeaderLength;
private Int16 _RecordLength;
private string _filename;
internal DbaseField[] DbaseColumns;
private FileStream fs;
private BinaryReader br;
private bool HeaderIsParsed;
public DbaseReader(string filename)
{
if (!File.Exists(filename))
throw new FileNotFoundException(String.Format("Could not find file \"{0}\"", filename));
_filename = filename;
HeaderIsParsed = false;
}
private bool _isOpen;
public bool IsOpen
{
get { return _isOpen; }
set { _isOpen = value; }
}
public void Open()
{
fs = new FileStream(_filename, FileMode.Open, FileAccess.Read);
br = new BinaryReader(fs);
_isOpen = true;
if (!HeaderIsParsed) //Don't read the header if it's already parsed
ParseDbfHeader(_filename);
}
public void Close()
{
br.Close();
fs.Close();
_isOpen = false;
}
public void Dispose()
{
if(_isOpen)
this.Close();
br = null;
fs = null;
}
// Binary Tree not working yet on Mono
// see bug: http://bugzilla.ximian.com/show_bug.cgi?id=78502
#if !MONO
///
/// Indexes a DBF column in a binary tree [NOT COMPLETE]
///
/// datatype to be indexed
/// Column to index
///
public SharpMap.Utilities.Indexing.BinaryTree CreateDbfIndex(int ColumnId) where T:IComparable
{
SharpMap.Utilities.Indexing.BinaryTree tree = new SharpMap.Utilities.Indexing.BinaryTree();
for (uint i = 0; i < ((this._NumberOfRecords>10000)?10000:this._NumberOfRecords); i++)
tree.Add(new SharpMap.Utilities.Indexing.BinaryTree.ItemValue((T)GetValue(i, ColumnId), i));
return tree;
}
#endif
/*
///
/// Creates an index on the columns for faster searching [EXPERIMENTAL - Requires Lucene dependencies]
///
///
public string CreateLuceneIndex()
{
string dir = this._filename + ".idx";
if (!System.IO.Directory.Exists(dir))
System.IO.Directory.CreateDirectory(dir);
Lucene.Net.Index.IndexWriter iw = new Lucene.Net.Index.IndexWriter(dir,new Lucene.Net.Analysis.Standard.StandardAnalyzer(),true);
for (uint i = 0; i < this._NumberOfRecords; i++)
{
FeatureDataRow dr = GetFeature(i,this.NewTable);
Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
// Add the object-id as a field, so that index can be maintained.
// This field is not stored with document, it is indexed, but it is not
// tokenized prior to indexing.
//doc.Add(Lucene.Net.Documents.Field.UnIndexed("SharpMap_oid", i.ToString())); //Add OID index
foreach(System.Data.DataColumn col in dr.Table.Columns) //Add and index values from DBF
{
if(col.DataType.Equals(typeof(string)))
// Add the contents as a valued Text field so it will get tokenized and indexed.
doc.Add(Lucene.Net.Documents.Field.UnStored(col.ColumnName,(string)dr[col]));
else
doc.Add(Lucene.Net.Documents.Field.UnStored(col.ColumnName, dr[col].ToString()));
}
iw.AddDocument(doc);
}
iw.Optimize();
iw.Close();
return this._filename + ".idx";
}
*/
///
/// Gets the date this file was last updated.
///
public DateTime LastUpdate
{
get { return _lastUpdate; }
}
private void ParseDbfHeader(string filename)
{
if (br.ReadByte() != 0x03)
throw new NotSupportedException("Unsupported DBF Type");
_lastUpdate = new DateTime((int)br.ReadByte() + 1900, (int)br.ReadByte(), (int)br.ReadByte()); //Read the last update date
_NumberOfRecords = br.ReadInt32(); // read number of records.
_HeaderLength = br.ReadInt16(); // read length of header structure.
_RecordLength = br.ReadInt16(); // read length of a record
fs.Seek(29, SeekOrigin.Begin); //Seek to encoding flag
_FileEncoding = GetDbaseLanguageDriver(br.ReadByte()); //Read and parse Language driver
fs.Seek(32, SeekOrigin.Begin); //Move past the reserved bytes
int NumberOfColumns = (_HeaderLength - 31) / 32; // calculate the number of DataColumns in the header
DbaseColumns = new DbaseField[NumberOfColumns];
for (int i = 0; i < DbaseColumns.Length;i++)
{
DbaseColumns[i] = new DbaseField();
DbaseColumns[i].ColumnName = System.Text.Encoding.UTF7.GetString((br.ReadBytes(11))).Replace("\0", "").Trim();
char fieldtype = br.ReadChar();
switch (fieldtype)
{
case 'L':
DbaseColumns[i].DataType = typeof(bool);
break;
case 'C':
DbaseColumns[i].DataType = typeof(string);
break;
case 'D':
DbaseColumns[i].DataType = typeof(DateTime);
break;
case 'N':
DbaseColumns[i].DataType = typeof(double);
break;
case 'F':
DbaseColumns[i].DataType = typeof(float);
break;
case 'B':
DbaseColumns[i].DataType = typeof(byte[]);
break;
default:
throw (new NotSupportedException("Invalid or unknown DBase field type '" + fieldtype +
"' in column '" + DbaseColumns[i].ColumnName + "'"));
}
DbaseColumns[i].Address = br.ReadInt32();
int Length = (int)br.ReadByte();
if (Length < 0) Length = Length + 256;
DbaseColumns[i].Length = Length;
DbaseColumns[i].Decimals = (int)br.ReadByte();
//If the double-type doesn't have any decimals, make the type an integer
if (DbaseColumns[i].Decimals == 0 && DbaseColumns[i].DataType == typeof(double))
if (DbaseColumns[i].Length <= 2)
DbaseColumns[i].DataType = typeof(Int16);
else if(DbaseColumns[i].Length<=4)
DbaseColumns[i].DataType = typeof(Int32);
else
DbaseColumns[i].DataType = typeof(Int64);
fs.Seek(fs.Position + 14, 0);
}
HeaderIsParsed = true;
CreateBaseTable();
}
private System.Text.Encoding GetDbaseLanguageDriver(byte dbasecode)
{
switch (dbasecode)
{
case 0x01: return System.Text.Encoding.GetEncoding(437); //DOS USA code page 437
case 0x02: return System.Text.Encoding.GetEncoding(850); // DOS Multilingual code page 850
case 0x03: return System.Text.Encoding.GetEncoding(1252); // Windows ANSI code page 1252
case 0x04: return System.Text.Encoding.GetEncoding(10000); // Standard Macintosh
case 0x08: return System.Text.Encoding.GetEncoding(865); // Danish OEM
case 0x09: return System.Text.Encoding.GetEncoding(437); // Dutch OEM
case 0x0A: return System.Text.Encoding.GetEncoding(850); // Dutch OEM Secondary codepage
case 0x0B: return System.Text.Encoding.GetEncoding(437); // Finnish OEM
case 0x0D: return System.Text.Encoding.GetEncoding(437); // French OEM
case 0x0E: return System.Text.Encoding.GetEncoding(850); // French OEM Secondary codepage
case 0x0F: return System.Text.Encoding.GetEncoding(437); // German OEM
case 0x10: return System.Text.Encoding.GetEncoding(850); // German OEM Secondary codepage
case 0x11: return System.Text.Encoding.GetEncoding(437); // Italian OEM
case 0x12: return System.Text.Encoding.GetEncoding(850); // Italian OEM Secondary codepage
case 0x13: return System.Text.Encoding.GetEncoding(932); // Japanese Shift-JIS
case 0x14: return System.Text.Encoding.GetEncoding(850); // Spanish OEM secondary codepage
case 0x15: return System.Text.Encoding.GetEncoding(437); // Swedish OEM
case 0x16: return System.Text.Encoding.GetEncoding(850); // Swedish OEM secondary codepage
case 0x17: return System.Text.Encoding.GetEncoding(865); // Norwegian OEM
case 0x18: return System.Text.Encoding.GetEncoding(437); // Spanish OEM
case 0x19: return System.Text.Encoding.GetEncoding(437); // English OEM (Britain)
case 0x1A: return System.Text.Encoding.GetEncoding(850); // English OEM (Britain) secondary codepage
case 0x1B: return System.Text.Encoding.GetEncoding(437); // English OEM (U.S.)
case 0x1C: return System.Text.Encoding.GetEncoding(863); // French OEM (Canada)
case 0x1D: return System.Text.Encoding.GetEncoding(850); // French OEM secondary codepage
case 0x1F: return System.Text.Encoding.GetEncoding(852); // Czech OEM
case 0x22: return System.Text.Encoding.GetEncoding(852); // Hungarian OEM
case 0x23: return System.Text.Encoding.GetEncoding(852); // Polish OEM
case 0x24: return System.Text.Encoding.GetEncoding(860); // Portuguese OEM
case 0x25: return System.Text.Encoding.GetEncoding(850); // Portuguese OEM secondary codepage
case 0x26: return System.Text.Encoding.GetEncoding(866); // Russian OEM
case 0x37: return System.Text.Encoding.GetEncoding(850); // English OEM (U.S.) secondary codepage
case 0x40: return System.Text.Encoding.GetEncoding(852); // Romanian OEM
case 0x4D: return System.Text.Encoding.GetEncoding(936); // Chinese GBK (PRC)
case 0x4E: return System.Text.Encoding.GetEncoding(949); // Korean (ANSI/OEM)
case 0x4F: return System.Text.Encoding.GetEncoding(950); // Chinese Big5 (Taiwan)
case 0x50: return System.Text.Encoding.GetEncoding(874); // Thai (ANSI/OEM)
case 0x57: return System.Text.Encoding.GetEncoding(1252); // ANSI
case 0x58: return System.Text.Encoding.GetEncoding(1252); // Western European ANSI
case 0x59: return System.Text.Encoding.GetEncoding(1252); // Spanish ANSI
case 0x64: return System.Text.Encoding.GetEncoding(852); // Eastern European MS–DOS
case 0x65: return System.Text.Encoding.GetEncoding(866); // Russian MS–DOS
case 0x66: return System.Text.Encoding.GetEncoding(865); // Nordic MS–DOS
case 0x67: return System.Text.Encoding.GetEncoding(861); // Icelandic MS–DOS
case 0x68: return System.Text.Encoding.GetEncoding(895); // Kamenicky (Czech) MS-DOS
case 0x69: return System.Text.Encoding.GetEncoding(620); // Mazovia (Polish) MS-DOS
case 0x6A: return System.Text.Encoding.GetEncoding(737); // Greek MS–DOS (437G)
case 0x6B: return System.Text.Encoding.GetEncoding(857); // Turkish MS–DOS
case 0x6C: return System.Text.Encoding.GetEncoding(863); // French–Canadian MS–DOS
case 0x78: return System.Text.Encoding.GetEncoding(950); // Taiwan Big 5
case 0x79: return System.Text.Encoding.GetEncoding(949); // Hangul (Wansung)
case 0x7A: return System.Text.Encoding.GetEncoding(936); // PRC GBK
case 0x7B: return System.Text.Encoding.GetEncoding(932); // Japanese Shift-JIS
case 0x7C: return System.Text.Encoding.GetEncoding(874); // Thai Windows/MS–DOS
case 0x7D: return System.Text.Encoding.GetEncoding(1255); // Hebrew Windows
case 0x7E: return System.Text.Encoding.GetEncoding(1256); // Arabic Windows
case 0x86: return System.Text.Encoding.GetEncoding(737); // Greek OEM
case 0x87: return System.Text.Encoding.GetEncoding(852); // Slovenian OEM
case 0x88: return System.Text.Encoding.GetEncoding(857); // Turkish OEM
case 0x96: return System.Text.Encoding.GetEncoding(10007); // Russian Macintosh
case 0x97: return System.Text.Encoding.GetEncoding(10029); // Eastern European Macintosh
case 0x98: return System.Text.Encoding.GetEncoding(10006); // Greek Macintosh
case 0xC8: return System.Text.Encoding.GetEncoding(1250); // Eastern European Windows
case 0xC9: return System.Text.Encoding.GetEncoding(1251); // Russian Windows
case 0xCA: return System.Text.Encoding.GetEncoding(1254); // Turkish Windows
case 0xCB: return System.Text.Encoding.GetEncoding(1253); // Greek Windows
case 0xCC: return System.Text.Encoding.GetEncoding(1257); // Baltic Windows
default:
return System.Text.Encoding.UTF7;
}
}
///
/// Returns a DataTable that describes the column metadata of the DBase file.
///
/// A DataTable that describes the column metadata.
public DataTable GetSchemaTable()
{
DataTable tab = new DataTable();
// all of common, non "base-table" fields implemented
tab.Columns.Add("ColumnName", typeof(System.String));
tab.Columns.Add("ColumnSize", typeof(Int32));
tab.Columns.Add("ColumnOrdinal", typeof(Int32));
tab.Columns.Add("NumericPrecision", typeof(Int16));
tab.Columns.Add("NumericScale", typeof(Int16));
tab.Columns.Add("DataType", typeof(System.Type));
tab.Columns.Add("AllowDBNull", typeof(bool));
tab.Columns.Add("IsReadOnly", typeof(bool));
tab.Columns.Add("IsUnique", typeof(bool));
tab.Columns.Add("IsRowVersion", typeof(bool));
tab.Columns.Add("IsKey", typeof(bool));
tab.Columns.Add("IsAutoIncrement", typeof(bool));
tab.Columns.Add("IsLong", typeof(bool));
foreach (DbaseField dbf in DbaseColumns)
tab.Columns.Add(dbf.ColumnName, dbf.DataType);
for (int i = 0; i < DbaseColumns.Length; i++)
{
DataRow r = tab.NewRow();
r["ColumnName"] = DbaseColumns[i].ColumnName;
r["ColumnSize"] = DbaseColumns[i].Length;
r["ColumnOrdinal"] = i;
r["NumericPrecision"] = DbaseColumns[i].Decimals;
r["NumericScale"] = 0;
r["DataType"] = DbaseColumns[i].DataType;
r["AllowDBNull"] = true;
r["IsReadOnly"] = true;
r["IsUnique"] = false;
r["IsRowVersion"] = false;
r["IsKey"] = false;
r["IsAutoIncrement"] = false;
r["IsLong"] = false;
// specializations, if ID is unique
//if (_ColumnNames[i] == "ID")
// r["IsUnique"] = true;
tab.Rows.Add(r);
}
return tab;
}
private SharpMap.Data.FeatureDataTable baseTable;
private void CreateBaseTable()
{
baseTable = new SharpMap.Data.FeatureDataTable();
foreach (DbaseField dbf in DbaseColumns)
{
baseTable.Columns.Add(dbf.ColumnName, dbf.DataType);
}
SetPrimaryKey(baseTable);
}
private void SetPrimaryKey(FeatureDataTable table)
{
const string primaryKeyColumnName = "OBJECTID";
if (baseTable.Columns.Contains(primaryKeyColumnName))
{
baseTable.PrimaryKey = new[] { baseTable.Columns[primaryKeyColumnName] };
}
else
{
var idColumn = new DataColumn("DSHELL_ADDED_" + primaryKeyColumnName, typeof(int))
{
AutoIncrement = true
};
baseTable.Columns.Add(idColumn);
baseTable.PrimaryKey = new[] { idColumn };
}
}
internal SharpMap.Data.FeatureDataTable NewTable
{
get { return baseTable.Clone(); }
}
internal object GetValue(uint oid, int colid)
{
if (!_isOpen)
throw (new ApplicationException("An attempt was made to read from a closed DBF file"));
if (oid >= _NumberOfRecords)
throw (new ArgumentException("Invalid DataRow requested at index " + oid.ToString()));
if (colid >= DbaseColumns.Length || colid < 0)
throw ((new ArgumentException("Column index out of range")));
fs.Seek(_HeaderLength + oid * _RecordLength, 0);
for (int i = 0; i < colid; i++)
br.BaseStream.Seek(DbaseColumns[i].Length,SeekOrigin.Current);
return ReadDbfValue(DbaseColumns[colid]);
}
private System.Text.Encoding _Encoding;
private System.Text.Encoding _FileEncoding;
///
/// Gets or sets the used for parsing strings from the DBase DBF file.
///
///
/// If the encoding type isn't set, the dbase driver will try to determine the correct .
///
public System.Text.Encoding Encoding
{
get { return _Encoding; }
set { _Encoding = value; }
}
///
/// Gets the feature at the specified Object ID
///
///
///
internal IFeature GetFeature(int oid)
{
if (!_isOpen)
{
Open();
}
if (oid >= _NumberOfRecords)
throw (new ArgumentException("Invalid DataRow requested at index " + oid.ToString()));
var feature = new ShapeFileFeature { AttributeReader = this, Oid = oid };
return feature;
}
internal object ReadDbfValue(DbaseField dbf)
{
switch (dbf.DataType.ToString())
{
case "System.String":
if(_Encoding==null)
return _FileEncoding.GetString(br.ReadBytes(dbf.Length)).Replace("\0", "").Trim();
else
return _Encoding.GetString(br.ReadBytes(dbf.Length)).Replace("\0", "").Trim();
case "System.Double":
string temp = System.Text.Encoding.UTF7.GetString(br.ReadBytes(dbf.Length)).Replace("\0", "").Trim();
double dbl = 0;
if(double.TryParse(temp, System.Globalization.NumberStyles.Float, SharpMap.Map.numberFormat_EnUS, out dbl))
return dbl;
else
return DBNull.Value;
case "System.Int16":
string temp16 = System.Text.Encoding.UTF7.GetString((br.ReadBytes(dbf.Length))).Replace("\0", "").Trim();
Int16 i16 = 0;
if (Int16.TryParse(temp16, System.Globalization.NumberStyles.Float, SharpMap.Map.numberFormat_EnUS, out i16))
return i16;
else
return DBNull.Value;
case "System.Int32":
string temp32 = System.Text.Encoding.UTF7.GetString((br.ReadBytes(dbf.Length))).Replace("\0", "").Trim();
Int32 i32 = 0;
if (Int32.TryParse(temp32, System.Globalization.NumberStyles.Float, SharpMap.Map.numberFormat_EnUS, out i32))
return i32;
else
return DBNull.Value;
case "System.Int64":
string temp64 = System.Text.Encoding.UTF7.GetString((br.ReadBytes(dbf.Length))).Replace("\0", "").Trim();
Int64 i64 = 0;
if (Int64.TryParse(temp64, System.Globalization.NumberStyles.Float, SharpMap.Map.numberFormat_EnUS, out i64))
return i64;
else
return DBNull.Value;
case "System.Single":
string temp4 = System.Text.Encoding.UTF8.GetString((br.ReadBytes(dbf.Length)));
float f = 0;
if (float.TryParse(temp4, System.Globalization.NumberStyles.Float, SharpMap.Map.numberFormat_EnUS, out f))
return f;
else
return DBNull.Value;
case "System.Boolean":
char tempChar = br.ReadChar();
return ((tempChar == 'T') || (tempChar == 't') || (tempChar == 'Y') || (tempChar == 'y'));
case "System.DateTime":
DateTime date;
// Mono has not yet implemented DateTime.TryParseExact
#if !MONO
if (DateTime.TryParseExact(System.Text.Encoding.UTF7.GetString((br.ReadBytes(8))),
"yyyyMMdd", SharpMap.Map.numberFormat_EnUS, System.Globalization.DateTimeStyles.None, out date))
return date;
else
return DBNull.Value;
#else
try
{
return date = DateTime.ParseExact ( System.Text.Encoding.UTF7.GetString((br.ReadBytes(8))),
"yyyyMMdd", SharpMap.Map.numberFormat_EnUS, System.Globalization.DateTimeStyles.None );
}
catch ( Exception e )
{
return DBNull.Value;
}
#endif
default:
throw (new NotSupportedException("Cannot parse DBase field '" + dbf.ColumnName + "' of type '" + dbf.DataType.ToString() + "'"));
}
}
public void StartReading(uint oid)
{
if (!IsOpen)
{
Open();
}
fs.Seek(_HeaderLength + oid * _RecordLength, 0);
if (br.ReadChar() == '*') //is record marked deleted?
{
// TODO: handle deleted features correctly
}
}
}
}