Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PersistenceOverhaul/HeuristicLab.ExtLibs/HeuristicLab.EPPlus/4.0.3/EPPlus-4.0.3/Style/ExcelRichTextHtmlUtility.cs @ 13325

Last change on this file since 13325 was 12074, checked in by sraggl, 10 years ago

#2341: Added EPPlus-4.0.3 to ExtLibs

File size: 7.4 KB
Line 
1/*******************************************************************************
2 * You may amend and distribute as you like, but don't remove this header!
3 *
4 * EPPlus provides server-side generation of Excel 2007/2010 spreadsheets.
5 * See http://www.codeplex.com/EPPlus for details.
6 *
7 * Copyright (C) 2011  Jan Källman
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
17 * See the GNU Lesser General Public License for more details.
18 *
19 * The GNU Lesser General Public License can be viewed at http://www.opensource.org/licenses/lgpl-license.php
20 * If you unfamiliar with this license or have questions about it, here is an http://www.gnu.org/licenses/gpl-faq.html
21 *
22 * All code and executables are provided "as is" with no warranty either express or implied.
23 * The author accepts no liability for any damage or loss of business that this product may cause.
24 *
25 * Code change notes:
26 *
27 * Author       Change            Date
28 * ******************************************************************************
29 * Richard Tallent    Initial Release       2012-08-13
30 *******************************************************************************/
31using System;
32using System.Collections.Generic;
33using System.Text;
34using System.Text.RegularExpressions;
35
36namespace OfficeOpenXml.Style
37{
38  public class ExcelRichTextHtmlUtility
39  {
40
41    /// <summary>
42    /// Provides basic HTML support by converting well-behaved HTML into appropriate RichText blocks.
43    /// HTML support is limited, and does not include font colors, sizes, or typefaces at this time,
44    /// and also does not support CSS style attributes. It does support line breaks using the BR tag.
45    ///
46    /// This routine parses the HTML into RegEx pairings of an HTML tag and the text until the NEXT
47    /// tag (if any). The tag is parsed to determine the setting change to be applied to the last set
48    /// of settings, and if the text is not blank, a new block is added to rich text.
49    /// </summary>
50    /// <param name="range"></param>
51    /// <param name="html">The HTML to parse into RichText</param>
52    /// <param name="defaultFontName"></param>
53    /// <param name="defaultFontSize"></param>
54
55    public static void SetRichTextFromHtml(ExcelRange range, string html, string defaultFontName, short defaultFontSize)
56    {
57      // Reset the cell value, just in case there is an existing RichText value.
58      range.Value = "";
59
60      // Sanity check for blank values, skips creating Regex objects for performance.
61      if (String.IsNullOrEmpty(html))
62      {
63        range.IsRichText = false;
64        return;
65      }
66
67      // Change all BR tags to line breaks. http://epplus.codeplex.com/discussions/238692/
68      // Cells with line breaks aren't necessarily considered rich text, so this is performed
69      // before parsing the HTML tags.
70      html = System.Text.RegularExpressions.Regex.Replace(html, @"<br[^>]*>", "\r\n", RegexOptions.Compiled | RegexOptions.IgnoreCase);
71
72      string tag;
73      string text;
74      ExcelRichText thisrt = null;
75      bool isFirst = true;
76
77      // Get all pairs of legitimate tags and the text between them. This loop will
78      // only execute if there is at least one start or end tag.
79      foreach (Match m in System.Text.RegularExpressions.Regex.Matches(html, @"<(/?[a-z]+)[^<>]*>([\s\S]*?)(?=</?[a-z]+[^<>]*>|$)", RegexOptions.Compiled | RegexOptions.IgnoreCase))
80      {
81        if (isFirst)
82        {
83          // On the very first match, set up the initial rich text object with
84          // the defaults for the text BEFORE the match.
85          range.IsRichText = true;
86          thisrt = range.RichText.Add(CleanText(html.Substring(0, m.Index))); // May be 0-length
87          thisrt.Size = defaultFontSize;                    // Set the default font size
88          thisrt.FontName = defaultFontName;                  // Set the default font name
89          isFirst = false;
90        }
91        // Get the tag and the block of text until the NEXT tag or EOS. If there are HTML entities
92        // encoded, unencode them, they should be passed to RichText as normal characters (other
93        // than non-breaking spaces, which should be replaced with normal spaces, they break Excel.
94        tag = m.Groups[1].Captures[0].Value;
95        text = CleanText(m.Groups[2].Captures[0].Value);
96
97        if (thisrt.Text == "")
98        {
99          // The most recent rich text block wasn't *actually* used last time around, so update
100          // the text and keep it as the "current" block. This happens with the first block if
101          // it starts with a tag, and may happen later if tags come one right after the other.
102          thisrt.Text = text;
103        }
104        else
105        {
106          // The current rich text block has some text, so create a new one. RichText.Add()
107          // automatically applies the settings from the previous block, other than vertical
108          // alignment.
109          thisrt = range.RichText.Add(text);
110        }
111        // Override the settings based on the current tag, keep all other settings.
112        SetStyleFromTag(tag, thisrt);
113      }
114
115      if (thisrt == null)
116      {
117        // No HTML tags were found, so treat this as a normal text value.
118        range.IsRichText = false;
119        range.Value = CleanText(html);
120      }
121      else if (String.IsNullOrEmpty(thisrt.Text))
122      {
123        // Rich text was found, but the last node contains no text, so remove it. This can happen if,
124        // say, the end of the string is an end tag or unsupported tag (common).
125        range.RichText.Remove(thisrt);
126
127        // Failsafe -- the HTML may be just tags, no text, in which case there may be no rich text
128        // directives that remain. If that is the case, turn off rich text and treat this like a blank
129        // cell value.
130        if (range.RichText.Count == 0)
131        {
132          range.IsRichText = false;
133          range.Value = "";
134        }
135
136      }
137
138    }
139
140    private static void SetStyleFromTag(string tag, ExcelRichText settings)
141    {
142      switch (tag.ToLower())
143      {
144        case "b":
145        case "strong":
146          settings.Bold = true;
147          break;
148        case "i":
149        case "em":
150          settings.Italic = true;
151          break;
152        case "u":
153          settings.UnderLine = true;
154          break;
155        case "s":
156        case "strike":
157          settings.Strike = true;
158          break;
159        case "sup":
160          settings.VerticalAlign = ExcelVerticalAlignmentFont.Superscript;
161          break;
162        case "sub":
163          settings.VerticalAlign = ExcelVerticalAlignmentFont.Subscript;
164          break;
165        case "/b":
166        case "/strong":
167          settings.Bold = false;
168          break;
169        case "/i":
170        case "/em":
171          settings.Italic = false;
172          break;
173        case "/u":
174          settings.UnderLine = false;
175          break;
176        case "/s":
177        case "/strike":
178          settings.Strike = false;
179          break;
180        case "/sup":
181        case "/sub":
182          settings.VerticalAlign = ExcelVerticalAlignmentFont.None;
183          break;
184        default:
185          // unsupported HTML, no style change
186          break;
187      }
188    }
189
190    private static string CleanText(string s)
191    {
192      // Need to convert HTML entities (named or numbered) into actual Unicode characters
193      s = System.Web.HttpUtility.HtmlDecode(s);
194      // Remove any non-breaking spaces, kills Excel
195      s = s.Replace("\u00A0", " ");
196      return s;
197    }
198
199  }
200}
Note: See TracBrowser for help on using the repository browser.