Context Navigation

source: trunk/sources/HeuristicLab.ExtLibs/HeuristicLab.EPPlus/4.0.3/EPPlus-4.0.3/Style/ExcelRichTextHtmlUtility.cs @ 12329

Visit:

Last change on this file since 12329 was 12074, checked in by sraggl, 10 years ago
#2341: Added EPPlus-4.0.3 to ExtLibs
File size: 7.4 KB

Rev	Line
[12074]	1	/*******************************************************************************
	2	* You may amend and distribute as you like, but don't remove this header!
	3	*
	4	* EPPlus provides server-side generation of Excel 2007/2010 spreadsheets.
	5	* See http://www.codeplex.com/EPPlus for details.
	6	*
	7	* Copyright (C) 2011 Jan Källman
	8	*
	9	* This library is free software; you can redistribute it and/or
	10	* modify it under the terms of the GNU Lesser General Public
	11	* License as published by the Free Software Foundation; either
	12	* version 2.1 of the License, or (at your option) any later version.
	13
	14	* This library is distributed in the hope that it will be useful,
	15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	17	* See the GNU Lesser General Public License for more details.
	18	*
	19	* The GNU Lesser General Public License can be viewed at http://www.opensource.org/licenses/lgpl-license.php
	20	* If you unfamiliar with this license or have questions about it, here is an http://www.gnu.org/licenses/gpl-faq.html
	21	*
	22	* All code and executables are provided "as is" with no warranty either express or implied.
	23	* The author accepts no liability for any damage or loss of business that this product may cause.
	24	*
	25	* Code change notes:
	26	*
	27	* Author Change Date
	28	* ******************************************************************************
	29	* Richard Tallent Initial Release 2012-08-13
	30	*******************************************************************************/
	31	using System;
	32	using System.Collections.Generic;
	33	using System.Text;
	34	using System.Text.RegularExpressions;
	35
	36	namespace OfficeOpenXml.Style
	37	{
	38	public class ExcelRichTextHtmlUtility
	39	{
	40
	41	/// <summary>
	42	/// Provides basic HTML support by converting well-behaved HTML into appropriate RichText blocks.
	43	/// HTML support is limited, and does not include font colors, sizes, or typefaces at this time,
	44	/// and also does not support CSS style attributes. It does support line breaks using the BR tag.
	45	///
	46	/// This routine parses the HTML into RegEx pairings of an HTML tag and the text until the NEXT
	47	/// tag (if any). The tag is parsed to determine the setting change to be applied to the last set
	48	/// of settings, and if the text is not blank, a new block is added to rich text.
	49	/// </summary>
	50	/// <param name="range"></param>
	51	/// <param name="html">The HTML to parse into RichText</param>
	52	/// <param name="defaultFontName"></param>
	53	/// <param name="defaultFontSize"></param>
	54
	55	public static void SetRichTextFromHtml(ExcelRange range, string html, string defaultFontName, short defaultFontSize)
	56	{
	57	// Reset the cell value, just in case there is an existing RichText value.
	58	range.Value = "";
	59
	60	// Sanity check for blank values, skips creating Regex objects for performance.
	61	if (String.IsNullOrEmpty(html))
	62	{
	63	range.IsRichText = false;
	64	return;
	65	}
	66
	67	// Change all BR tags to line breaks. http://epplus.codeplex.com/discussions/238692/
	68	// Cells with line breaks aren't necessarily considered rich text, so this is performed
	69	// before parsing the HTML tags.
	70	html = System.Text.RegularExpressions.Regex.Replace(html, @"<br[^>]*>", "\r\n", RegexOptions.Compiled \| RegexOptions.IgnoreCase);
	71
	72	string tag;
	73	string text;
	74	ExcelRichText thisrt = null;
	75	bool isFirst = true;
	76
	77	// Get all pairs of legitimate tags and the text between them. This loop will
	78	// only execute if there is at least one start or end tag.
	79	foreach (Match m in System.Text.RegularExpressions.Regex.Matches(html, @"<(/?[a-z]+)[^<>]>([\s\S]?)(?=</?[a-z]+[^<>]*>\|$)", RegexOptions.Compiled \| RegexOptions.IgnoreCase))
	80	{
	81	if (isFirst)
	82	{
	83	// On the very first match, set up the initial rich text object with
	84	// the defaults for the text BEFORE the match.
	85	range.IsRichText = true;
	86	thisrt = range.RichText.Add(CleanText(html.Substring(0, m.Index))); // May be 0-length
	87	thisrt.Size = defaultFontSize; // Set the default font size
	88	thisrt.FontName = defaultFontName; // Set the default font name
	89	isFirst = false;
	90	}
	91	// Get the tag and the block of text until the NEXT tag or EOS. If there are HTML entities
	92	// encoded, unencode them, they should be passed to RichText as normal characters (other
	93	// than non-breaking spaces, which should be replaced with normal spaces, they break Excel.
	94	tag = m.Groups[1].Captures[0].Value;
	95	text = CleanText(m.Groups[2].Captures[0].Value);
	96
	97	if (thisrt.Text == "")
	98	{
	99	// The most recent rich text block wasn't actually used last time around, so update
	100	// the text and keep it as the "current" block. This happens with the first block if
	101	// it starts with a tag, and may happen later if tags come one right after the other.
	102	thisrt.Text = text;
	103	}
	104	else
	105	{
	106	// The current rich text block has some text, so create a new one. RichText.Add()
	107	// automatically applies the settings from the previous block, other than vertical
	108	// alignment.
	109	thisrt = range.RichText.Add(text);
	110	}
	111	// Override the settings based on the current tag, keep all other settings.
	112	SetStyleFromTag(tag, thisrt);
	113	}
	114
	115	if (thisrt == null)
	116	{
	117	// No HTML tags were found, so treat this as a normal text value.
	118	range.IsRichText = false;
	119	range.Value = CleanText(html);
	120	}
	121	else if (String.IsNullOrEmpty(thisrt.Text))
	122	{
	123	// Rich text was found, but the last node contains no text, so remove it. This can happen if,
	124	// say, the end of the string is an end tag or unsupported tag (common).
	125	range.RichText.Remove(thisrt);
	126
	127	// Failsafe -- the HTML may be just tags, no text, in which case there may be no rich text
	128	// directives that remain. If that is the case, turn off rich text and treat this like a blank
	129	// cell value.
	130	if (range.RichText.Count == 0)
	131	{
	132	range.IsRichText = false;
	133	range.Value = "";
	134	}
	135
	136	}
	137
	138	}
	139
	140	private static void SetStyleFromTag(string tag, ExcelRichText settings)
	141	{
	142	switch (tag.ToLower())
	143	{
	144	case "b":
	145	case "strong":
	146	settings.Bold = true;
	147	break;
	148	case "i":
	149	case "em":
	150	settings.Italic = true;
	151	break;
	152	case "u":
	153	settings.UnderLine = true;
	154	break;
	155	case "s":
	156	case "strike":
	157	settings.Strike = true;
	158	break;
	159	case "sup":
	160	settings.VerticalAlign = ExcelVerticalAlignmentFont.Superscript;
	161	break;
	162	case "sub":
	163	settings.VerticalAlign = ExcelVerticalAlignmentFont.Subscript;
	164	break;
	165	case "/b":
	166	case "/strong":
	167	settings.Bold = false;
	168	break;
	169	case "/i":
	170	case "/em":
	171	settings.Italic = false;
	172	break;
	173	case "/u":
	174	settings.UnderLine = false;
	175	break;
	176	case "/s":
	177	case "/strike":
	178	settings.Strike = false;
	179	break;
	180	case "/sup":
	181	case "/sub":
	182	settings.VerticalAlign = ExcelVerticalAlignmentFont.None;
	183	break;
	184	default:
	185	// unsupported HTML, no style change
	186	break;
	187	}
	188	}
	189
	190	private static string CleanText(string s)
	191	{
	192	// Need to convert HTML entities (named or numbered) into actual Unicode characters
	193	s = System.Web.HttpUtility.HtmlDecode(s);
	194	// Remove any non-breaking spaces, kills Excel
	195	s = s.Replace("\u00A0", " ");
	196	return s;
	197	}
	198
	199	}
	200	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences