HTML2Text
This code is for converting a string with HTML tags and encodings into a text-only string. It rids multiple spaces and supports ALL encoded characters like ", , Ä and so on.
Original Author: unknown
Inputs
The original HTML String OrigHTML$
Returns
Text-only string
Code
Public Function HTML2Text(ByVal OrigHTML$) As String
OrigHTML$ = Mid$(OrigHTML$, InStr(OrigHTML$, ">") + 1)
On Error Resume Next
If InStr(LCase$(OrigHTML$), " 0 Then
OrigHTML$ = Mid$(OrigHTML$, InStr(LCase$(OrigHTML$), "
If InStr(LCase$(OrigHTML$), "") > 0 Then _
OrigHTML$ = Left$(OrigHTML$, InStr(LCase$(OrigHTML$), "") - 1)
End If
Do While Len(OrigHTML$)
CurrChar$ = Left$(OrigHTML$, 1)
OrigHTML$ = Mid$(OrigHTML$, 2)
Select Case CurrChar$
Case " "
OrigHTML$ = LTrim$(OrigHTML$)
Case vbCr, vbLf
CurrChar$ = ""
If Left$(OrigHTML$, 1) = vbLf Then OrigHTML$ = Mid$(OrigHTML$, 2)
OrigHTML$ = LTrim$(OrigHTML$)
Case "<"
CurrChar$ = ""
If InStr(OrigHTML$, ">") > 0 Then
CurrChar$ = Left$(OrigHTML$, InStr(OrigHTML$, ">") - 1)
OrigHTML$ = Mid$(OrigHTML$, InStr(OrigHTML$, ">") + 1)
Select Case LCase$(CurrChar$)
Case "p", "/div"
CurrChar$ = vbCrLf + vbCrLf
Case "br"
CurrChar$ = vbCrLf
Case Else
CurrChar$ = ""
End Select
End If
Case "&"
If InStr(OrigHTML$, ";") > 0 And InStr(OrigHTML$, ";") < InStr(OrigHTML$, " ") Then
CurrChar$ = Left$(OrigHTML$, InStr(OrigHTML$, ";") - 1)
OrigHTML$ = Mid$(OrigHTML$, InStr(OrigHTML$, ";") + 1)
Select Case CurrChar$
Case "amp"
CurrChar$ = "&"
Case "quot"
CurrChar$ = """"
Case "lt"
CurrChar$ = "<"
Case "gt"
CurrChar$ = ">"
Case "nbsp"
CurrChar$ = " "
Case "Auml"
CurrChar$ = "?ä"
Case "auml"
CurrChar$ = "?ñ"
Case "iexcl"
CurrChar$ = "?í"
Case "cent"
CurrChar$ = "?ó"
Case "pound"
CurrChar$ = "?ú"
Case "curren"
CurrChar$ = "?ñ"
Case "yen"
CurrChar$ = "?Ñ"
Case "brvbar"
CurrChar$ = "|"
Case "sect"
CurrChar$ = "?º"
Case "uml"
CurrChar$ = "?¿"
Case "copy"
CurrChar$ = "?®"
Case "ordf"
CurrChar$ = "?¬"
Case "laquo"
CurrChar$ = "?½"
Case "not"
CurrChar$ = "?¼"
Case "reg"
CurrChar$ = "?«"
Case "macr"
CurrChar$ = "?»"
Case "deg"
CurrChar$ = "??"
Case "plusm"
CurrChar$ = "??"
Case "sup2"
CurrChar$ = "??"
Case "sup3"
CurrChar$ = "??"
Case "acute"
CurrChar$ = "??"
Case "micro"
CurrChar$ = "?Á"
Case "para"
CurrChar$ = "?Â"
Case "middot"
CurrChar$ = "?À"
Case "cedil"
CurrChar$ = "?©"
Case "sup1"
CurrChar$ = "??"
Case "ordm"
CurrChar$ = "??"
Case "raquo"
CurrChar$ = "??"
Case "frac14"
CurrChar$ = "??"
Case "frac12"
CurrChar$ = "?¢"
Case "frac34"
CurrChar$ = "?¥"
Case "iquest"
CurrChar$ = "??"
Case "Agrave"
CurrChar$ = "?Ç"
Case "Aacute"
CurrChar$ = "?ü"
Case "Acirc"
CurrChar$ = "?é"
Case "Atilde"
CurrChar$ = "?â"
Case "Aring"
CurrChar$ = "?à"
Case "AElig"
CurrChar$ = "?å"
Case "Ccedil"
CurrChar$ = "?ç"
Case "Egrave"
CurrChar$ = "?ê"
Case "Eacute"
CurrChar$ = "?ë"
Case "Ecirc"
CurrChar$ = "?è"
Case "Euml"
CurrChar$ = "?ï"
Case "Igrave"
CurrChar$ = "?î"
Case "Iacute"
CurrChar$ = "?ì"
Case "Icirc"
CurrChar$ = "?Ä"
Case "Iuml"
CurrChar$ = "?Å"
Case "ETH"
CurrChar$ = "?É"
Case "Ntilde"
CurrChar$ = "?æ"
Case "Ograve"
CurrChar$ = "?Æ"
Case "Oacute"
CurrChar$ = "?ô"
Case "Ocirc"
CurrChar$ = "?ö"
Case "Otilde"
CurrChar$ = "?ò"
Case "Ouml"
CurrChar$ = "?û"
Case "times"
CurrChar$ = "?ù"
Case "Oslash"
CurrChar$ = "?ÿ"
Case "Ugrave"
CurrChar$ = "?Ö"
Case "Uacute"
CurrChar$ = "?Ü"
Case "Ucirc"
CurrChar$ = "?ø"
Case "Uuml"
CurrChar$ = "?£"
Case "Yacute"
CurrChar$ = "?Ø"
Case "THORN"
CurrChar$ = "?×"
Case "szlig"
CurrChar$ = "?ƒ"
Case "agrave"
CurrChar$ = "?á"
Case "aacute"
CurrChar$ = "?í"
Case "acirc"
CurrChar$ = "?ó"
Case "atilde"
CurrChar$ = "?ú"
Case "aring"
CurrChar$ = "?Ñ"
Case "aelig"
CurrChar$ = "?ª"
Case "ccedil"
CurrChar$ = "?º"
Case "egrave"
CurrChar$ = "?¿"
Case "eacute"
CurrChar$ = "?®"
Case "ecirc"
CurrChar$ = "?¬"
Case "euml"
CurrChar$ = "?½"
Case "igrave"
CurrChar$ = "?¼"
Case "iacute"
CurrChar$ = "?¡"
Case "icirc"
CurrChar$ = "?«"
Case "iuml"
CurrChar$ = "?»"
Case "eth"
CurrChar$ = "??"
Case "ntilde"
CurrChar$ = "??"
Case "ograve"
CurrChar$ = "??"
Case "oacute"
CurrChar$ = "??"
Case "ocirc"
CurrChar$ = "??"
Case "otilde"
CurrChar$ = "?Á"
Case "ouml"
CurrChar$ = "?Â"
Case "divide"
CurrChar$ = "?À"
Case "oslash"
CurrChar$ = "?©"
Case "ugrave"
CurrChar$ = "??"
Case "uacute"
CurrChar$ = "??"
Case "ucirc"
CurrChar$ = "??"
Case "uuml"
CurrChar$ = "??"
Case "yacute"
CurrChar$ = "?¢"
Case "thorn"
CurrChar$ = "?¥"
Case "yuml"
CurrChar$ = "??"
Case Else
CurrChar$ = "&" + CurrChar$ + ";"
End Select
End If
End Select
NoHTML$ = NoHTML$ + CurrChar$
Loop
HTML2Text = NoHTML$
End Function
Loading Comments ...
Comments
No comments have been added for this post.
You must be logged in to make a comment.