由於有大量Transaction資料(超過十萬行)在資料庫, 同時想做多個不同的統計, 為免加重資料庫的負荷, 唯有一次過query所需資料, 再在client那邊進行統計運算
Google一下後, 發現.net中的DataTable竟然沒有GROUP BY的功能
找了一些用foreach loop + Rows.Add 來自行製作一個新Table的教學
類似:
private DataTable GroupBy(DataTable myTable)
{
DataTable dt = new DataTable();
dt.Columns.Add("ColumnName");
foreach (DataRow row in myTable.Rows)
{
string s = row["ColumnName"].ToString();
DataRow[] drx = dt.Select("ColumnName= '" + s + "'");
if (drx.Length == 0)
{
dt.Rows.Add(new object[] { s });
}
}
return dt;
}
可是這麼一來就每一項統計都要自行製作一套Group by的運作, 延續性太低, 於是我在
arstechnica forum發現了一個有用的function, 就拿了來改改, 比較dynamic一點點
public struct StatGroupByInfo
{
public string table_Filter;
public string[] column_FilterExpr;
public string[] column_names;
public string[] column_functions;
public Type[] column_types;
public string[] group_by; //also sort by the first group_by column
public StatGroupByInfo(string table_filter, string[] col_filters, string[] col_names, string[] col_functions, Type[] col_type, string[] group)
{
table_Filter = table_filter;
column_FilterExpr = col_filters;
column_names = col_names;
column_functions = col_functions;
column_types = col_type;
group_by = group;
}
}
static class DataSetFunctions
{
public static DataTable GroupBy(DataTable table, StatGroupByInfo info)
{
return GroupBy(table, info.table_Filter, info.column_FilterExpr, info.column_names, info.column_functions, info.column_types, info.group_by);
}
public static DataTable GroupBy(DataTable table, string table_filter, string[] filterExpr, string[] aggregate_columns, string[] aggregate_functions, Type[] column_types, string[] group_by_columns)
{
//Filter the table
DataView dvView = new DataView(table, table_filter, group_by_columns[0] + " ASC", DataViewRowState.CurrentRows);
//Group the table by column
DataTable dtGrouped = dvView.ToTable(true, group_by_columns);
//Implement aggregate functions
for (int i = 0; i < aggregate_columns.Length; i++)
{
dtGrouped.Columns.Add(aggregate_columns[i], column_types[i]);
}
foreach (DataRow row in dtGrouped.Rows)
{
List<string> filter_parts = new List<string>();
for (int i = 0; i < group_by_columns.Length; i++)
{
filter_parts.Add(String.Format("[{0}] = '{1}'", group_by_columns[i], row[group_by_columns[i]].ToString().Replace("'", "''")));
}
string filter = String.Join(" AND ", filter_parts.ToArray());
for (int i = 0; i < aggregate_columns.Length; i++)
{
string columnFilter = filter;
if (!string.IsNullOrEmpty(filterExpr[i].Trim()))
{
columnFilter = columnFilter + " AND " + filterExpr[i];
if (!string.IsNullOrEmpty(table_filter.Trim()))
{
columnFilter = table_filter + " AND " + columnFilter;
}
}
row[aggregate_columns[i]] = table.Compute(aggregate_functions[i], columnFilter);
if (row[aggregate_columns[i]] == DBNull.Value && column_types[i] == typeof(uint))
{
row[aggregate_columns[i]] = 0;
}
}
}
return dtGrouped;
}
}
用法:
StatGroupByInfo groupByInfo = new StatGroupByInfo( "", new string[] { "[action] = 1 or [action] = 2", "[action] = 3 or [action] = 4", "[action] = 5 or [action] = 6"}, new string[]{ "itemmallPoint", "nonItemmallPoint", "totalPoint" }, new string[]{ "sum(usePoint)", "sum(usePoint)", "sum(usePoint)" }, new Type[]{ typeof(uint), typeof(uint), typeof(uint) }, new string[]{ "date" } ),
DataTable _dt = DataSetFunctions.GroupBy(DbRes.Transaction, groupByInfo);
解說:
要customize 一個group by 的operation, 主要是用StatGroupByInfo裡的variables
public string table_Filter: 就是原來那個table的filter, 先濾掉不在統計範圍內的entry
public string[] column_FilterExpr: 每個column不同的filter
public string[] column_names: column的名稱, 可以自訂
public string[] column_functions: 每個column的aggregate function, 如 count, max, avg 等, 可參考
MSDN
public Type[] column_types: 每個column的type, 要跟上面aggregate的return type對應
public string[] group_by: 要進行 GROUP BY 的columns
[後記]
後來發現有很多人寫的DataHelperClass, 用來manage dataset的data:
geekswithblogs一堆Helper Class 中的
DataSetHelper Class that I am using
jiezhi@cnblogs
MSDN 上的一些文章