.net去除html標簽代碼 

          public  string NoHTML(string Htmlstring)
                  {
                      //刪除腳本
                      Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
                      //刪除HTML
                      Htmlstring = Regex.Replace(Htmlstring, @"<br>", "$br$", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"$br$", "<br>", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "<br>", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
                     // Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
                      Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
                      Htmlstring.Replace("<", "");
                      Htmlstring.Replace(">", "");
                      Htmlstring.Replace("\r\n", "");
                      return Htmlstring;
                  }

          posted on 2009-04-17 12:03 sanmao 閱讀(741) 評論(1)  編輯  收藏

          FeedBack:
          # re: .net去除html標簽代碼[未登錄]
          2014-05-17 09:20 | bobby
          正則表達式在這個程度上己經(jīng)不夠了,比如清除未知的屬性,和標記,建議使用wuxiu.safehelper  回復  更多評論
            

          只有注冊用戶登錄后才能發(fā)表評論。


          網(wǎng)站導航:
           

          常用鏈接

          留言簿(5)

          隨筆分類

          隨筆檔案

          搜索

          •  

          最新評論

          閱讀排行榜

          評論排行榜

          主站蜘蛛池模板: 深水埗区| 沈阳市| 墨江| 乌兰浩特市| 楚雄市| 武强县| 社旗县| 迭部县| 威远县| 扬中市| 北碚区| 达日县| 六安市| 仁化县| 聂拉木县| 阿拉善盟| 南康市| 齐河县| 济阳县| 日照市| 泰宁县| 肇源县| 孙吴县| 东光县| 霍州市| 神池县| 横山县| 蕲春县| 定兴县| 峨边| 宜阳县| 庐江县| 灵台县| 普安县| 辉县市| 同仁县| 准格尔旗| 织金县| 克拉玛依市| 吉隆县| 七台河市|