C#写的一段解析 CSV 文件的代码

类别:.NET开发 点击:0 评论:0 推荐:

using System;

/**
 * The Comma Separated Value (CSV) File Format: http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
 * 描述:解析 CSV 格式的文件。
 * 由这里 http://blog.csdn.net/emu/archive/2003/03/01/16338.aspx 的Java代码改写而来(增加了行处理)
 * 日期: 2004-10-22 14:55
 */
namespace Mitumori {
 /// <summary>
 /// CSVUtil 用来处理CSV格式的文件内容成一二维数组。
 /// </summary>
 public class CSVUtil {
  private CSVUtil() {
  }

  /// <summary>
  /// 分割 CVS 文件内容为一个二维数组。
  /// </summary>
  /// <param name="src">CVS 文件内容字符串</param>
  /// <returns>二维数组。String[line count][column count]</returns>
  public static String[][] SplitCSV(String src) {
   // 如果输入为空,返回 0 长度字符串数组
   if (src==null || src.Length == 0) return new String[0][]{};
   String st="";
   System.Collections.ArrayList lines = new System.Collections.ArrayList(); // 行集合。其元素为行
   System.Collections.ArrayList cells = new System.Collections.ArrayList(); // 单元格集合。其元素为一个单元格
   bool beginWithQuote = false;
   int maxColumns = 0;
   // 遍历字符串的字符
   for (int i=0;i<src.Length;i++){
    char ch = src[i];

    #region CR 或者 LF
    //A record separator may consist of a line feed (ASCII/LF=0x0A),
    //or a carriage return and line feed pair (ASCII/CRLF=0x0D 0x0A).
    // 这里我不明白CR为什么不作为separator呢,在Mac OS上好像是用CR的吧。
    // 这里我“容错”一下,CRLF、LFCR、CR、LF都作为separator
    if (ch == '\r') {
     #region CR
     if (beginWithQuote) {
      st += ch;
     }
     else {
      if(i+1 < src.Length && src[i+1] == '\n') { // 如果紧接的是LF,那么直接把LF吃掉
       i++;
      }
      
      //line = new String[cells.Count];
      //System.Array.Copy (cells.ToArray(typeof(String)), line, line.Length);
      //lines.Add(line); // 把上一行放到行集合中去
      
      cells.Add(st);
      st = "";
      beginWithQuote = false;

      maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
      lines.Add(cells);
      st = "";
      cells = new System.Collections.ArrayList();
     }
     #endregion CR
    }
    else if (ch == '\n') {
     #region LF
     if (beginWithQuote) {
      st += ch;
     }
     else {
      if(i+1 < src.Length && src[i+1] == '\r') { // 如果紧接的是LF,那么直接把LF吃掉
       i++;
      }
      
      //line = new String[cells.Count];
      //System.Array.Copy (cells.ToArray(typeof(String)), line, line.Length);
      //lines.Add(line); // 把上一行放到行集合中去
      
      cells.Add(st);
      st = "";
      beginWithQuote = false;

      maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
      lines.Add(cells);
      st = "";
      cells = new System.Collections.ArrayList();
     }
     #endregion LF
    }
    #endregion CR 或者 LF
    else if (ch == '\"'){ // 双引号
     #region 双引号
     if (beginWithQuote){
      i++;
      if (i>=src.Length){
       cells.Add(st);
       st="";
       beginWithQuote=false;
      }
      else{
       ch=src[i];
       if (ch == '\"'){
        st += ch;
       }
       else if (ch == ','){
        cells.Add(st);
        st="";
        beginWithQuote = false;
       }
       else{
        throw new Exception("Single double-quote char mustn't exist in filed "+(cells.Count+1)+" while it is begined with quote\nchar at:"+i);
       }
      }
     }
     else if (st.Length==0){
      beginWithQuote = true;
     }
     else{
      throw new Exception("Quote cannot exist in a filed which doesn't begin with quote!\nfield:"+(cells.Count+1));
     }
     #endregion 双引号
    }
    else if (ch==','){
     #region 逗号
     if (beginWithQuote){
      st += ch;
     }
     else{
      cells.Add(st);
      st = "";
      beginWithQuote = false;
     }
     #endregion 逗号
    }
    else{
     #region 其它字符
     st += ch;
     #endregion 其它字符
    }

   }
   if (st.Length != 0){
    if (beginWithQuote){
     throw new Exception("last field is begin with but not end with double quote");
    }
    else{
     cells.Add(st);
     maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
     lines.Add(cells);
    }
   }

   String[][] ret = new String[lines.Count][];
   for (int i = 0; i < ret.Length; i++) {
    cells = (System.Collections.ArrayList) lines[i];
    ret[i] = new String[maxColumns];
    for (int j = 0; j < maxColumns; j++) {
     ret[i][j] = cells[j].ToString();
    }
   }
   //System.Array.Copy(lines.ToArray(typeof(String[])), ret, ret.Length);
   return ret;
  }

  public static void aMain(String[] args){
   String src1=  "\"fh,zg\",sdf,\"asfs,\",\",dsdf\",\"aadf\"\"\",\"\"\"hdfg\",\"fgh\"\"dgnh\",hgfg'dfh,\"asdfa\"\"\"\"\",\"\"\"\"\"fgjhg\",\"gfhg\"\"\"\"hb\"\n";
   try {
    String[][] Ret = SplitCSV(src1);
    for (int i=0;i<Ret.Length;i++){
     for (int j = 0; j < Ret[i].Length; i++) {
      System.Console.WriteLine(Ret[i][j]);
     }
     System.Console.WriteLine();
    }
   }
   catch(Exception e) {
    System.Console.WriteLine(e.StackTrace);
   }
  }


 }
}

本文地址:http://com.8s8s.com/it/it43333.htm