我目前有一个处理程序,该处理程序获取excel文件的文件路径和标签名,将文件处理为数据表,然后将表序列化为json字符串以返回。在尝试处理大文件之前,此方法一直有效,然后出现内存不足异常。
我在想,如果不先将所有内容加载到数据表中,而是直接加载到json字符串中,它将减少内存使用。但是,我一直找不到如何执行此操作的任何示例。
我可以直接从OleDbConnection序列化为字符串吗?怎么样?
public void ProcessRequest(HttpContext context) { string path = context.Request["path"]; string tableNames = context.Request["tableNames"]; string connectionString = string.Empty; if (path.EndsWith(".xls")) { connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0; Data Source={0}; Extended Properties=""Excel 8.0;HDR=YES;IMEX=1""", path); } else if (path.EndsWith(".xlsx")) { connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0; Data Source={0}; Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1""", path); } DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb"); DbDataAdapter adapter = factory.CreateDataAdapter(); OleDbConnection conn = new OleDbConnection(connectionString); conn.Open(); DataTable tmp = new DataTable(); DbCommand selectCommand = factory.CreateCommand(); selectCommand.CommandText = String.Format("SELECT * FROM [{0}]", tableNames); selectCommand.Connection = conn; adapter.SelectCommand = selectCommand; adapter.Fill(tmp); string tabdata = JsonConvert.SerializeObject(tmp); context.Response.Write(tabdata); }
首先,您应该使用以下简单方法停止序列化到中间产品string,而直接序列化到HttpResponse.OutputStream:
string
HttpResponse.OutputStream
public static class JsonExtensions { public static void SerializeToStream(object value, System.Web.HttpResponse response, JsonSerializerSettings settings = null) { if (response == null) throw new ArgumentNullException("response"); SerializeToStream(value, response.OutputStream, settings); } public static void SerializeToStream(object value, TextWriter writer, JsonSerializerSettings settings = null) { if (writer == null) throw new ArgumentNullException("writer"); var serializer = JsonSerializer.CreateDefault(settings); serializer.Serialize(writer, value); } public static void SerializeToStream(object value, Stream stream, JsonSerializerSettings settings = null) { if (stream == null) throw new ArgumentNullException("stream"); using (var writer = new StreamWriter(stream)) { SerializeToStream(value, writer, settings); } } }
由于大字符串需要用于基础数组的大块 连续 内存,因此char这是您首先要耗尽内存的地方。另请参阅Json.NET的性能提示
char
为了最大程度地减少内存使用和分配的对象数量,Json.NET支持直接对流进行序列化和反序列化。在处理大小大于85kb的JSON文档时,一次读取或写入JSON而不是将整个JSON字符串加载到内存中尤其重要,这样可以避免JSON字符串出现在大对象堆中。
接下来,请确保将所有一次性用品包装在using声明中,如下所示。
using
那也许可以解决您的问题,但是如果没有解决,您可以IDataReader使用以下命令将JSON 序列化为JSON JsonConverter:
IDataReader
JsonConverter
public class DataReaderConverter : JsonConverter { public override bool CanConvert(Type objectType) { return typeof(IDataReader).IsAssignableFrom(objectType); } public override bool CanRead { get { return false; } } public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { throw new NotImplementedException(); } public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { var reader = (IDataReader)value; writer.WriteStartArray(); while (reader.Read()) { writer.WriteStartObject(); for (int i = 0; i < reader.FieldCount; i++) { writer.WritePropertyName(reader.GetName(i)); if (reader.IsDBNull(i)) writer.WriteNull(); else serializer.Serialize(writer, reader[i]); } writer.WriteEndObject(); } writer.WriteEndArray(); } }
然后序列化为流,如下所示:
public static class ExcelExtensions { private static string GetExcelConnectionString(string path) { string connectionString = string.Empty; if (path.EndsWith(".xls")) { connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0; Data Source={0}; Extended Properties=""Excel 8.0;HDR=YES;IMEX=1""", path); } else if (path.EndsWith(".xlsx")) { connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0; Data Source={0}; Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1""", path); } return connectionString; } public static string SerializeJsonToString(string path, string workSheetName, JsonSerializerSettings settings = null) { using (var writer = new StringWriter()) { SerializeJsonToStream(path, workSheetName, writer, settings); return writer.ToString(); } } public static void SerializeJsonToStream(string path, string workSheetName, Stream stream, JsonSerializerSettings settings = null) { using (var writer = new StreamWriter(stream)) SerializeJsonToStream(path, workSheetName, writer, settings); } public static void SerializeJsonToStream(string path, string workSheetName, TextWriter writer, JsonSerializerSettings settings = null) { settings = settings ?? new JsonSerializerSettings(); var converter = new DataReaderConverter(); settings.Converters.Add(converter); try { string connectionString = GetExcelConnectionString(path); DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb"); using (OleDbConnection conn = new OleDbConnection(connectionString)) { conn.Open(); using (DbCommand selectCommand = factory.CreateCommand()) { selectCommand.CommandText = String.Format("SELECT * FROM [{0}]", workSheetName); selectCommand.Connection = conn; using (var reader = selectCommand.ExecuteReader()) { JsonExtensions.SerializeToStream(reader, writer, settings); } } } } finally { settings.Converters.Remove(converter); } } }
注意- 轻轻测试。在投入生产之前,请务必对照您现有的方法对它进行单元测试!对于转换器代码,我以DataReader的JSON序列化为灵感。
更新资料
我的转换器以DataTableConverter与Json.NET 相同的结构发出JSON 。因此,您将能够DataTable使用Json.NET自动反序列化。如果您希望使用更紧凑的格式,则可以定义自己的格式,例如:
DataTableConverter
DataTable
{ "columns": [ "Name 1", "Name 2" ], "rows": [ [ "value 11", "value 12" ], [ "value 21", "value 22" ] ] }
他们创建了以下转换器:
public class DataReaderArrayConverter : JsonConverter { public override bool CanConvert(Type objectType) { return typeof(IDataReader).IsAssignableFrom(objectType); } public override bool CanRead { get { return false; } } public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { throw new NotImplementedException(); } static string[] GetFieldNames(IDataReader reader) { var fieldNames = new string[reader.FieldCount]; for (int i = 0; i < reader.FieldCount; i++) fieldNames[i] = reader.GetName(i); return fieldNames; } static void ValidateFieldNames(IDataReader reader, string[] fieldNames) { if (reader.FieldCount != fieldNames.Length) throw new InvalidOperationException("Unequal record lengths"); for (int i = 0; i < reader.FieldCount; i++) if (fieldNames[i] != reader.GetName(i)) throw new InvalidOperationException(string.Format("Field names at index {0} differ: \"{1}\" vs \"{2}\"", i, fieldNames[i], reader.GetName(i))); } const string columnsName = "columns"; const string rowsName = "rows"; public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { var reader = (IDataReader)value; writer.WriteStartObject(); string[] fieldNames = null; while (reader.Read()) { if (fieldNames == null) { writer.WritePropertyName(columnsName); fieldNames = GetFieldNames(reader); serializer.Serialize(writer, fieldNames); writer.WritePropertyName(rowsName); writer.WriteStartArray(); } else { ValidateFieldNames(reader, fieldNames); } writer.WriteStartArray(); for (int i = 0; i < reader.FieldCount; i++) { if (reader.IsDBNull(i)) writer.WriteNull(); else serializer.Serialize(writer, reader[i]); } writer.WriteEndArray(); } if (fieldNames != null) { writer.WriteEndArray(); } writer.WriteEndObject(); } }
当然,您需要在客户端上创建自己的反序列化转换器。
或者,您可以考虑压缩响应。我从未尝试过,但请参阅HttpWebRequest和GZip Http响应和ASP.NET GZip编码警告。