ExcelOperations Library - ExcelToDataSet, DataSetToExcel

ExcelOperations Library - ExcelToDataSet, DataSetToExcel

This function library does two things mainly.

  • Takes an Excel binary (.xlsx) and exports it to a DataSet.
  • Takes a DataSet and exports it to an Excel binary. (.xlsx)

It also includes a helper function that will open an excel file on the server, and run it through the ExcelToDataSet function for you.

ExcelToDataSet will export all sheets in an excel file (.xlsx) to a DataSet. Each sheet get’s it’s own table.

DataSetToExcel will export all tables in a DataSet to an excel file (.xlsx). Each table get’s its own sheet.

ReadExcelFileReturnDataSet will read an excel file and run it through ExcelToDataSet and return a DataSet.

Library → ExcelOperations.efxj (17.5 KB)

Here are the guts →

References →

  • DocumentFormat.OpenXml.dll
  • Newtonsoft.Json.dll
  • System.IO.Packaging.dll

Function → ExcelToDataSet

Signature →

Name Type
b64Data System.String
hasHeaderRows System.Boolean

Usings →
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using Newtonsoft.Json;

Code →

Func<SpreadsheetDocument, List<string>> GetWorksheetNames = (doc) =>
{
    return doc.WorkbookPart.Workbook.Sheets
        .Elements<Sheet>()
        .Select(s => s.Name?.Value)
        .Where(name => name != null)
        .ToList();
};

Func<Cell, SharedStringTable, object> GetCellValue = (cell, sharedStrings) =>
{
    if (cell == null || cell.CellValue == null)
        return DBNull.Value;

    string value = cell.CellValue.Text;

    if (cell.DataType != null && cell.DataType.HasValue)
    {
        CellValues dataType = cell.DataType.Value;

        if (dataType == CellValues.SharedString)
        {
            int index;
            if (int.TryParse(value, out index) && sharedStrings != null)
                return sharedStrings.ElementAt(index).InnerText;
            return value;
        }
        else if (dataType == CellValues.Boolean)
        {
            return value == "1" ? true : false;
        }
        else
        {
            return value; // inline strings etc.
        }
    }
    else
    {
        // Try parse as int first
        int intVal;
        if (int.TryParse(value, out intVal))
            return intVal;

        // Then try date (Excel stores as OADate)
        double oaDate;
        if (double.TryParse(value, out oaDate) && oaDate > 0 && oaDate < 60000)
        {
            try
            {
                DateTime dt = DateTime.FromOADate(oaDate);
                if (dt.Year > 1900 && dt.Year < 2100)
                    return dt;
            }
            catch { }
        }

        // Fallback to double
        double dbl;
        if (double.TryParse(value, out dbl))
            return dbl;

        return value;
    }
};


Func<SpreadsheetDocument, string, bool, DataTable> ParseExcelFile = (xlDoc, sheet, hasHeader) =>
{
    WorkbookPart workbookPart = xlDoc.WorkbookPart;

    Sheet sheetMeta = workbookPart.Workbook.Sheets.Elements<Sheet>().FirstOrDefault(x => x.Name == sheet);

    if (sheetMeta == null) throw new Exception("Sheet not found");

    WorksheetPart worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheetMeta.Id);

    SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();

    SharedStringTable sharedStringTable = workbookPart.SharedStringTablePart != null
        ? workbookPart.SharedStringTablePart.SharedStringTable
        : null;

    List<Row> rows = sheetData.Elements<Row>().ToList();

    if (rows.Count == 0) return null;

    DataTable table = new DataTable(sheet.Replace(" ", ""));

    int startRowIndex = 0;

    // Use typeof(object) to allow mixed data types
    if (hasHeader)
    {
        Row headerRow = rows[0];
        foreach (Cell cell in headerRow.Elements<Cell>())
        {
            string columnName = GetCellValue(cell, sharedStringTable)?.ToString();
            table.Columns.Add(string.IsNullOrEmpty(columnName)
                ? "Column" + (table.Columns.Count + 1)
                : columnName, typeof(object));
        }
        startRowIndex = 1;
    }
    else
    {
        int columnCount = rows.Max(r => r.Elements<Cell>().Count());
        for (int i = 0; i < columnCount; i++)
        {
            table.Columns.Add("Column" + (i + 1), typeof(object));
        }
    }

    for (int i = startRowIndex; i < rows.Count; i++)
    {
        Row row = rows[i];
        List<Cell> cells = row.Elements<Cell>().ToList();
        DataRow dataRow = table.NewRow();

        for (int j = 0; j < table.Columns.Count; j++)
        {
            if (j < cells.Count)
                dataRow[j] = GetCellValue(cells[j], sharedStringTable);
            else
                dataRow[j] = DBNull.Value;
        }

        table.Rows.Add(dataRow);
    }

    return table;
};

#if !DEBUG
Func<string, bool, DataSet> BuildDataSetFromSpreadSheet = (b64, hr) =>
{
    using (var memoryStream = new System.IO.MemoryStream(Convert.FromBase64String(b64)))
    {
        using (var xlDoc = SpreadsheetDocument.Open(memoryStream, false))
        {
            var listSheets = GetWorksheetNames(xlDoc);
          
            DataSet tempDS = new DataSet();
          
            listSheets.ForEach(x =>
            {
                tempDS.Tables.Add(ParseExcelFile(xlDoc, x, hr));
            });  
        
            return tempDS;
            
        }      
    }
};
#else
Func<string, bool, DataSet> BuildDataSetFromSpreadSheet = (b64, hr) => null;
#endif


//Execution begins here:

var ListErrors  = new List<Exception>();
try
{
#if !DEBUG  
    Success = true;
#else
    throw new BLException("Library was left in DEBUG Mode.");
#endif

    ds = BuildDataSetFromSpreadSheet(b64Data, hasHeaderRows); //hasHeaderRows is global for all sheets

}
catch (Exception ex)
{
    Success = false;
    ListErrors.Add(ex);
}
finally
{
    ListErrorsJson = JsonConvert.SerializeObject(ListErrors, Formatting.Indented);
}

Function → DataSetToExcel

Signature →

Name Type
ds System.Data.DataSet
writeHeaderRows System.Boolean

Usings →
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using Newtonsoft.Json;

Code →

Func<object, Cell> CreateCellFromValue = (value) =>
{
    var cell = new Cell();

    if (value == null || value == DBNull.Value)
    {
        cell.DataType = CellValues.String;
        cell.CellValue = new CellValue("");
    }
    else if (value is bool)
    {
        cell.DataType = CellValues.Boolean;
        cell.CellValue = new CellValue((bool)value ? "1" : "0");
    }
    else if (value is int || value is long || value is float || value is double || value is decimal)
    {
        // Leave DataType null to let Excel treat it as number
        cell.CellValue = new CellValue(Convert.ToString(value, System.Globalization.CultureInfo.InvariantCulture));
    }
    else
    {
        cell.DataType = CellValues.String;
        var str = value.ToString();
        if (str.StartsWith("'")) // Remove Excel-style forced text apostrophe
            str = str.Substring(1);
        cell.CellValue = new CellValue(str);
    }

    return cell;
};

//Put library in debug mode while coding for the error to go away, take it off before saving
#if !DEBUG      
Func<DataSet, byte[]> ExportToExcelAsBytes = (dataSet) =>
{
    using (var stream = new System.IO.MemoryStream())
    {
        //Create Excel document
        using (SpreadsheetDocument doc = SpreadsheetDocument.Create(stream, SpreadsheetDocumentType.Workbook, true))
        {
            WorkbookPart workbookPart = doc.AddWorkbookPart();
            workbookPart.Workbook     = new Workbook();
            Sheets sheets             = workbookPart.Workbook.AppendChild(new Sheets());

            uint sheetId = 1;

            //Loop through each table in the dataset
            foreach (DataTable table in dataSet.Tables)
            {
                WorksheetPart worksheetPart = workbookPart.AddNewPart<WorksheetPart>();
                SheetData sheetData = new SheetData();
                worksheetPart.Worksheet = new Worksheet(sheetData);

                if(writeHeaderRows)
                {
                    // Header row
                    Row headerRow = new Row();
                    foreach (DataColumn col in table.Columns)
                    {
                        headerRow.AppendChild(CreateCellFromValue(col.ColumnName));
                    }
                    sheetData.AppendChild(headerRow);
                }

                // Data rows
                foreach (DataRow row in table.Rows)
                {
                    Row newRow = new Row();
                    foreach (var item in row.ItemArray)
                    {
                        newRow.AppendChild(CreateCellFromValue(item));
                    }
                    sheetData.AppendChild(newRow);
                }

                string relId = workbookPart.GetIdOfPart(worksheetPart);
                sheets.Append(new Sheet
                {
                    Id = relId,
                    SheetId = sheetId++,
                    Name = table.TableName
                });
            }

            workbookPart.Workbook.Save();
        }

        return stream.ToArray(); //Return byte array
    }
};
#else
Func<DataSet, byte[]> ExportToExcelAsBytes = (dataSet) => null;
#endif

var ListErrors  = new List<Exception>();

try
{
#if !DEBUG  
    Success = true;
#else
    throw new BLException("Library was left in DEBUG Mode.");
#endif

   b64Data = Convert.ToBase64String(ExportToExcelAsBytes(ds));    

}
catch (Exception ex)
{
    Success = false;
    ListErrors.Add(ex);
}
finally
{
    ListErrorsJson = JsonConvert.SerializeObject(ListErrors, Formatting.Indented);
}

Function → ReadExcelFileReturnDataSet

Signature →

Name Type
inServerFolder System.Int32
inFileName System.String
hasHeader System.Boolean

Usings →
none

Code →


var sb = (ISandbox)Sandbox;

var serverFolder = (ServerFolder)inServerFolder;

var filePath = new FilePath(serverFolder, inFileName);

var excelFileBytes = sb.IO.File.ReadAllBytes(filePath);

var excelFileAsB64 = Convert.ToBase64String(excelFileBytes);

var processExcel = ThisLib.ExcelToDataSet(excelFileAsB64, hasHeader);

if(processExcel.Success)
{
    ds = processExcel.ds;
}

Example here →

11 Likes

@klincecum, have you used these libraries since the 2025.1.8 update? I installed the function library today and I’m getting the following error:

The type ‘Package’ is defined in an assembly that is not referenced. You must add a reference to assembly WindowsBase, Version=3.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35.

It appears it’s looking for a reference to WindowsBase.dll, but that isn’t available as an assembly reference (likely due to the un-Windowing of the AKS environment).

Edit: Apparently my search skills need work. It looks like you guys covered this in another thread - the code works despite the warning.