C++ 高性能爬虫

码力码力我爱你 2024-06-17 17:35:21 阅读 97

main.cpp

#include "stdafx.h"#include "CNetCrawler.h"#include"afxmt.h"#include"DownloadData.h"#include"MainThread.h"#include"ProjectDlg.h"#include"CNetCrawlerDlg.h"#include<afxinet.h> //向http服务器发送请求及网络相关操作的头文件#ifdef _DEBUG#define new DEBUG_NEW#undef THIS_FILEstatic char THIS_FILE[] = __FILE__;#endifextern CCNetCrawlerDlg *pDlg; //主窗口的指针extern bool ThreadPause; //是否暂停线程//全局变量/// MainThreadIMPLEMENT_DYNCREATE(MainThread, CWinThread)//在类声明中包含了DECLARE_DYNCREATE 允许CObject派生类对象在运行时自动建立 //用户界面线程构造函数MainThread::MainThread(){ //用户界面线程构造函数m_bDone=false; //初始化线程未停止}MainThread::~MainThread(){}//函数功能:初始化BOOL MainThread::InitInstance(){ //重写初始化函数// TODO: perform and per-thread initialization here//生成一个新建工程对话框 //设置共享数据m_DownData.SetPro(m_FileId,m_ThreadNum,m_LocalDir);//根据用户设定起始文件名称,最大线程数量,保存路径m_BeginURL.MakeLower();//起始地址的设置if(m_BeginURL.Find(_T("http://"))==-1) str_BeginURL=_T("http://")+m_BeginURL;//若初始的URL地址并不是以http://开头,则加入else str_BeginURL=m_BeginURL; //将初始URL地址赋值给工程起始网络地址变量str_BeginURLstr_ProjectName=m_ProjectName;//工程名的设置CWnd *button; //窗口类中的按钮button=pDlg->GetDlgItem(IDC_BUTTON_NEW);//通过子窗口IDC_BUTTON_NEW得到窗口指针button->EnableWindow(FALSE);//设置该指定的窗口禁止接受鼠标和键盘的输入Run(str_BeginURL); //运行守护线程,启动工作者线程,下载网页return TRUE;}/*int MainThread::ExitInstance(){CWnd *button;button=pDlg->GetDlgItem(IDC_BUTTON_NEW);button->EnableWindow(TRUE); //线程结束,设置该指定的窗口允许鼠标和键盘的输入// TODO: perform any per-thread cleanup herereturn CWinThread::ExitInstance();}*/int MainThread::ExitInstance(){CWnd *button;button=pDlg->GetDlgItem(IDC_BUTTON_NEW);button->EnableWindow(TRUE); //线程结束,设置该指定的窗口允许鼠标和键盘的输入ThreadPause=false;pDlg->m_active=true;//-置回初始值,以便新建下一个工程(7.6添加)pDlg->m_pause.EnableWindow(false);//-禁用按钮“暂停/继续”,工作日志(四)中有资料。pDlg->m_stop.EnableWindow(false);//-禁用按钮“停止”,工作日志(四)中有资料。(7.6添加)// TODO: perform any per-thread cleanup herereturn CWinThread::ExitInstance();}BEGIN_MESSAGE_MAP(MainThread, CWinThread) //使用宏BEGIN_MESSAGE_MAP实现消息映射//{ {AFX_MSG_MAP(MainThread)// NOTE - the ClassWizard will add and remove mapping macros here.//}}AFX_MSG_MAPEND_MESSAGE_MAP() //消息映射结束/*===========================================全局函数===========================================================*//*-----------------------------------------------------------------------------------函数功能:从网页中提取URL调用之前的预备条件:网页已经从网络上下载到本地存为临时文件返回后的处理:删除临时文件入口参数:CString s 临时文件的本地地址MainThread *ptr 用于获得主控线程的共享数据区抽象算法:①只读方式打开本地文件②查找连接,若未在共享数据区的URL任务队列中出现,则加入队列③关闭文件调用关系:被每一个工作者线程调用,来从网页中读取链接工作者线程(worker thread)的传入函数不能为类中的成员函数,故声明为全局函数-----------------------------------------------------------------------------------*/void FindURL(CString s,MainThread *ptr){CStdioFile fin; //CStdioFile 对象代表一个用运行时函数fopen 打开的C 运行时流式文件if(!fin.Open(s,CFile::modeRead)) return; //以只读模式打开文件 s是临时文件的本地地址CString str_BaseURL;if(!fin.ReadString(str_BaseURL))return; //从文件读出的字符串为空CString mark="href="; //-链接以"href="开始int i=-1,j=-1,URL_end=-1;CString str_Line,str_URL;bool exist=false; //-标记网页是否被访问过的标签while(fin.ReadString(str_Line)){ //-从指定文件中读取一行,读取成功时执行循环if(ptr->m_DownData.IsFull())break; //ptr用于控制主线程的数据区i=str_Line.Find(mark);if(i==-1)continue; //如果本行无URL/*------------------------否则(本行有URL),提取一个链接------------------------*///-处理形如"href = "http://..." "的URLstr_Line=str_Line.Mid(i+4); //-去掉herfstr_Line.TrimLeft(); //-去掉当前字符串最左边的空格if(str_Line[0]=='=') str_Line=str_Line.Mid(1); //-去掉当前字符串起始的等号(若等号存在)str_Line.TrimLeft(); //-去掉当前字符串最左边的空格if(str_Line[0]=='\"') //str_Line[0]是双引号{ //-处理" "中的URLURL_end=str_Line.Find(_T("\""),1); //找到双引号之间的URL地址if(URL_end==-1 || URL_end==1)continue; //并未找到str_URL=str_Line.Mid(1,URL_end-1); //设置URL的值是从网页中获得链接"href=" (双引号)}else if(str_Line[0]=='\'') //str_Line[0]是单引号{ //处理' '中的URLURL_end=str_Line.Find(_T("\'"),1);if(URL_end==-1 || URL_end==1) continue;str_URL=str_Line.Mid(1,URL_end-1); //设置URL的值是从网页中获得链接"href=" (双引号)}else{i=str_Line.Find(_T(">"));j=str_Line.Find(_T(" "));if(i==-1) URL_end=j; //若无>,截至空格else if(j==-1) URL_end=i; //若无空格,截至>else if(i>j) URL_end=j; //若都有,且>在空格前出现,截至空格else URL_end=i; //若都有,且>在空格后出现,截至>if(URL_end==-1) continue; //i=-1&&j=-1时,进行下一次循环str_URL=str_Line.Left(URL_end);}if(str_URL.Find(_T("mailto:"))!=-1 ) continue; //忽略电子邮件地址if(str_URL.Find(_T("#"))!=-1 ) continue; //忽略含#的URLif(str_URL.Find(_T(".asp"))==-1 && str_URL.Find(_T(".php"))==-1 && str_URL.Find(_T(".jsp"))==-1 &&str_URL.Find(_T(".aspx"))==-1 && str_URL.Find(_T(".htm"))==-1 && str_URL.Find(_T(".html"))==-1 &&str_URL.Find(_T(".shtml"))==-1 && str_URL.Find(_T(".shtml"))==-1 && str_URL[str_URL.GetLength()-1]!=_T('/')) continue;//忽略掉含以上字符串的URL/*------------------------用网页中获取的相对地址算出URL------------------------*/str_URL.TrimLeft(); //-去掉最左边的空格str_URL.TrimRight(); //-去掉最右边的空格if(str_URL==_T("")) continue; //若为空,继续if(str_URL==_T("http://")) continue; //若为http://,继续if(str_URL.Find(_T("http:"))==-1){LPTSTR p=new TCHAR[200];unsigned long m=200;/*BOOL InternetCombineUrl( __in LPCTSTR lpszBaseUrl, __in LPCTSTR lpszRelativeUrl, __out LPTSTR lpszBuffer, __inout LPDWORD lpdwBufferLength, __in DWORD dwFlags);Value ICU_BROWSER_MODE MeaningDoes not encode or decode characters after "#" or "?", and does not remove trailing white space after "?". If this value is not specified, the entire URL is encoded and trailing white space is removed.http://msdn.microsoft.com/en-us/library/aa384355(VS.85).aspx//-工作日志(五)中有更多相关资料*/if(!InternetCombineUrl(str_BaseURL,str_URL,p,&m,ICU_BROWSER_MODE)) continue;//-根据网页中获取的相对地址算出URL,失败则进行下一次循环str_URL=p;delete []p;}//if(str_URL.Find(ptr->str_Confine)==-1)continue;if(!(ptr->m_DownData.IsExisted(str_URL))) ptr->m_DownData.AddURL(str_URL);//-若未在共享数据区的URL任务队列中出现,则加入队列}fin.Close();//-关闭}/*-------------------------------------------------------------------------------------函数功能://controlling function for the worker thread//从URL任务队列得到一个网址并尝试调用之前的预备条件:网页已经从网络上下载到本地存为临时文件返回后的处理:删除临时文件入口参数:LPVOID pParam主控线程的指针,用于获取共享数据区抽象算法:①试图从URL队列中获取一个URL,若失败则返回(结束线程)②根据地址向服务器发送请求,若请求失败则返回(结束线程)③根据网页,提取主要内容,并存一个临时文件,用FindURL函数查找链接④从共享数据区删除线程标签⑤结束线程工作者线程(worker thread)的传入函数不能为类中的成员函数,故声明为全局函数-----------------------------------------------------------------------------------*/UINT DownloadFile(LPVOID pParam){MainThread *ptr=(MainThread *)pParam; //pParam主控线程的指针,用于获取共享数据区CString URL;if(!(ptr->m_DownData.GetCurURL(URL))){ //试图获取一个URLptr->m_DownData.DeleThread();return 0;}//以下为建立网络发出请求//使用类CInternetSession 创建并初始化一个或多个同时的Internet 会话。如果需要,还可描述与代理服务器的连接。//如果Internet连接必须在应用过程中保持着,可创建一个类CWinApp的CInternetSession成员。/*CInternetSession( LPCTSTR pstrAgent = NULL, DWORD_PTR dwContext = 1, DWORD dwAccessType = PRE_CONFIG_INTERNET_ACCESS, LPCTSTR pstrProxyName = NULL, LPCTSTR pstrProxyBypass = NULL, DWORD dwFlags = 0 );INTERNET_OPEN_TYPE_DIRECT //Connect directly to Internet.*/CInternetSession MyConnect(_T("Microsoft MFC APP"),1,INTERNET_OPEN_TYPE_DIRECT);CHttpConnection* pServer = NULL; //为试图打开连接的应用打开一个HTTP服务器CHttpFile* pHttpFile=NULL; //CHttpFile提供向HTTP服务器中请求和读取的功能// check to see if this is a reasonable URL http://210.48.16.168 /ss/aa.jpgCString strServerName; //210.48.16.168CString strObject; ///ss/aa.jpgINTERNET_PORT nPort; //端口DWORD dwServiceType; //URL的支持类型 如httptry{ //如果成功地解析了URL,则返回非零值。如果URL为空或它不包含已知的Internet服务类型,则为0if(!AfxParseURL(URL, dwServiceType, strServerName, strObject, nPort) || //dwServiceType返回URL支持的类型dwServiceType != INTERNET_SERVICE_HTTP) { //不是http站点THROW(new CInternetException(dwServiceType)); //除去异常 错误} //用CInternetSession实例来构造CHttpConnection对象pServer=MyConnect.GetHttpConnection(strServerName, nPort);//当前的URL向服务器请求,建立连接pHttpFile = pServer->OpenRequest(CHttpConnection::HTTP_VERB_GET, //得到指向CHttpFile类的指针strObject, NULL, 1, NULL, NULL);pHttpFile->AddRequestHeaders(_T("Accept: text/*\r\nUser-Agent: MFC\r\n"));//添加发往HTTP服务器的请求头pHttpFile->SendRequest(); //向HTTP服务器发送请求 DWORD StatusCode;pHttpFile->QueryInfoStatusCode(StatusCode);//获得HTTP请求相关联的状态号并将其放到所提供的StatusCode 参数中 //只有在SendRequest 被成功调用或者一个CHttpFile对象被 OpenURL成 //功创建后,才能使用该成员函数//file isn't there or is redirected/*200 URL定位,接着传输 400 不可理解的请求 404 所请求的URL未找到 405 服务器不支持所请求的方法 500 未知的服务器错误 503 已达到服务器容量 */if(StatusCode == HTTP_STATUS_MOVED ||StatusCode == HTTP_STATUS_REDIRECT || //是否需要重新定向StatusCode == HTTP_STATUS_REDIRECT_METHOD){CString strNewLocation;pHttpFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strNewLocation); //返回HTTP请求中的回答或请求头int nPlace = strNewLocation.Find(_T("Location: ")); if(nPlace == -1) //并未找到 说明站点地址改变THROW(new CInternetException(StatusCode)); //除去异常 错误strNewLocation = strNewLocation.Mid(nPlace + 10); //从'Location: '之后,删除'Location: ',存于变量nPlace = strNewLocation.Find('\n'); //寻找这一行的末尾if(nPlace > 0) strNewLocation = strNewLocation.Left(nPlace);//返回nPlace长度的字符串// close up the redirected sitepHttpFile->Close(); //关闭CHttpFile 并释放其资源delete pHttpFile;pServer->Close(); //关闭CHttpConnection 并释放其资源delete pServer; // 检查原来的位置if(!AfxParseURL(strNewLocation, dwServiceType, //重定向的URL并未成功解析strServerName, strObject, nPort)) THROW(new CInternetException(StatusCode)); //去除异常 错误if (dwServiceType != INTERNET_SERVICE_HTTP) //重定向的URL不是一个HTTP资源THROW(new CInternetException(StatusCode)); //除去异常 错误 // 在新的位置尝试,并继续请求 获得HTTP请求相关联的状态号 同上pServer = MyConnect.GetHttpConnection(strServerName, nPort);pHttpFile = pServer->OpenRequest(CHttpConnection::HTTP_VERB_GET,strObject, NULL, 1, NULL, NULL);pHttpFile->AddRequestHeaders(_T("Accept: text/*\r\nUser-Agent: MFC\r\n"));pHttpFile->SendRequest();pHttpFile->QueryInfoStatusCode(StatusCode);}if (StatusCode != HTTP_STATUS_OK) //http状态错误THROW(new CInternetException(StatusCode)); //除去异常 错误}catch(CInternetException *pEx) //出错处理 并未关闭重定向站点{if(pServer!=NULL){pServer->Close();delete pServer;}if(pHttpFile!=NULL){pHttpFile->Close();delete pHttpFile;}pEx->Delete();MyConnect.Close();ptr->m_DownData.DeleThread(); //从共享数据区队列中删除该线程pDlg->Add(URL+"\r\n",0);return 0;}//message for "Connected"//if m_DownData is not full save the filebool isfull=!(ptr->m_DownData.IsFull());//若队列已满 isfull值为0,否则为非零值CString str_FileName; //内容提取后保存为本地文件ptr->m_DownData.GetFileName(str_FileName);//str_FileName为提取的内容在本地保存的文件名//内容提取后保存为本地文件CStdioFile LocalFile;//本地临时文件CStdioFile tempLocalFile;//提取变量CString dbTitle(""), dbUrl(""), dbText("");LocalFile.Open(str_FileName,CFile::modeCreate|CFile::modeWrite|CFile::typeText); //以扩展名.txt文件形式打开if(isfull) //若队列未满 打开一个windows的临时文件 .tmp为扩展名tempLocalFile.Open(str_FileName+".tmp",CFile::modeCreate|CFile::modeWrite|CFile::typeText);if(isfull) //向str_FileName.tmp文件写入数据tempLocalFile.WriteString(URL+_T("\n"));LocalFile.WriteString(URL+_T("\n")); //向str_FileName.txt文件写入数据dbUrl = URL;CString line;UINT w=0,k=0;int i=0,j=0;LPTSTR p;bool isUTF8 = false;//记录网页是否UTF-8编码//找出本页的标题while(pHttpFile->ReadString(line)) //逐行读取文件中的数据{//每次读取一行if (line.Find(_T("charset=utf-8")) != -1 || line.Find(_T("charset=UTF-8")) != -1){isUTF8 = true;}if(isfull)tempLocalFile.WriteString(line+_T("\n"));//若队列未满 下一行i=line.Find(_T("<TITLE>"));if(i==-1) i=line.Find(_T("<Title>")); //i=-1,并未找到<TITLE>if(i==-1) i=line.Find(_T("<title>")); //i=-1,并未找到<Title>if(i!=-1) //找到<TITLE>、<Title>或<title>{i+=7;j=line.Find(_T("</")); //从<title>之后找'</'if(j!=-1)LocalFile.WriteString(line.Mid(i,j-i)+_T("\n"));//找到'</' 将<title>和</title>之间的标题写入下一行//if(j!=-1)//{//dbTitle = line.Mid(i,j-i);//}else{LocalFile.WriteString(line.Mid(i));//从第i个字符开始,将该行的标题写入文件//dbTitle.Append(line.Mid(i));while(pHttpFile->ReadString(line)){if(isfull) //队列未满tempLocalFile.WriteString(line+_T("\n"));// 从下一行开始j=line.Find(_T("</")); //再次寻找'</'if(j==-1){LocalFile.WriteString(line);//并未找到 标题在该行未结束 继续下一行 continue//dbTitle.Append(line);continue;}LocalFile.WriteString(line.Left(j)+"\n");//将'</' 左侧的字符写入文件 得到完整的标题//dbTitle.Append(line.Left(j));break; }}break; //找到完整的标题 直接退出循环}if(line.Find(_T("</HAED>"))!=-1 || line.Find(_T("</Head>"))!=-1 || line.Find(_T("</head>"))!=-1){LocalFile.WriteString(_T("Untitled Page\n"));//未找到<TITLE>、<Title>或<title> //但找到"</HAED>" "</Head>"或"</head>" //dbTitle = _T("Untitled Page");break; //说明该网页无标题}};bool body=false;while(pHttpFile->ReadString(line)){if(isfull)tempLocalFile.WriteString(line+_T("\n")); //走向下一行if(line.Find(_T("<BODY"))!=-1 || line.Find(_T("<body"))!=-1 || line.Find(_T("<Body"))!=-1) body=true; //找到 "<BODY" "<body" 或 "<Body" 变量body值赋值为trueif(body && line.Find('>')!=-1) break;}//对主体内容的过滤while(pHttpFile->ReadString(line)){if(isfull)tempLocalFile.WriteString(line+_T("\n"));//队列未满 从下一行开始p=line.GetBuffer(1024); //重新获取其内部字符缓冲区的指针pptr->TrimString((LPTSTR)p,w,k,TRUE); //-设置主要保留中文line.ReleaseBuffer(); //将申请的1024个空间多余的释放掉 可与GetBuffer配合使用if(line!="") //如果tempLocalFile文件该行不为空{int f=0,g=0;while((g=line.Find(_T("&nbsp;"),f))!=-1) //寻找&nbsp 即空格{//去除&nbsp;符号line.Delete(g,6);line.Insert(g,_T(" ")); //将&nbsp替换为空格f=g;}while((g=line.Find(_T("&gt;"),f))!=-1) //寻找&gt; 即'>'{//去除&gt;符号line.Delete(g,4);line.Insert(g,_T(">")); //将&gt;替换为'>'f=g;}while((g=line.Find(_T("&lt;"),f))!=-1){//去除&lt;符号line.Delete(g,4);line.Insert(g,_T("<"));f=g;}line.TrimLeft(); //该行左边的空格除去line.TrimRight(); //该行右边的空格除去if(line!="") //该行为空LocalFile.WriteString(line+_T(" "));//LocalFile文件相应为空//dbText.Append(line+_T(" "));}}LocalFile.Close(); //关闭LocalFile文件LocalFile.Open(str_FileName, CFile::modeRead|CFile::typeText);LocalFile.ReadString(dbUrl);LocalFile.ReadString(dbTitle);CString tempLine;while (LocalFile.ReadString(tempLine)){dbText.Append(tempLine);dbText.Append(_T(" "));}LocalFile.Close();if (isUTF8){ptr->UTF8ToGB2312(dbTitle.GetBuffer(), dbTitle.GetLength(), dbTitle);ptr->UTF8ToGB2312(dbText.GetBuffer(), dbText.GetLength(), dbText);}int endPos = dbTitle.Find(_T("</"));if (endPos > 0){dbTitle = dbTitle.Mid(0, endPos);}ptr->InsertDB(dbUrl, dbTitle, dbText);if(isfull) tempLocalFile.Close();//查找临时文件中的链接if(isfull){FindURL(str_FileName+_T(".tmp"),ptr);//查找临时文件中的链接//DeleteFile(str_FileName+_T(".tmp"));//删除临时文件}pHttpFile->Close(); //关闭CHttpFile 并释放其资源delete pHttpFile;pServer->Close(); //关闭CHttpConnection 并释放其资源delete pServer;MyConnect.Close();//状态显示pDlg->Add(URL+_T("\r\n"),1);ptr->m_DownData.DeleThread(); //线程结束 从数据区删除一个线程记录return 1;}/*==========================================================================================================*//// MainThread message handlersvoid MainThread::Run(CString &str_Begin){m_DownData.AddURL(str_Begin);//-向共享数据区URL队列加入根URLif(m_DownData.AddThread()) AfxBeginThread(DownloadFile,this);//-访问根URLwhile(!m_bDone && !(m_DownData.IsEmpty() && m_DownData.GetCurThread()==0)){//依次启动工作者线程,根据共享数据区URL队列依次到指定URL下载Sleep(100);if(ThreadPause) continue;//判断全局变量ThreadPause,是否暂停线程if(m_DownData.AddThread()) AfxBeginThread(DownloadFile,this);}Sleep(1000);AfxMessageBox(_T("任务完成!"));ExitInstance();}/*=========================================================================================================函数功能:过滤掉字符串中的html语言标签入口参数:LPTSTR pszBuffer 字符串指针指向被处理的字符串,以'\0'结尾UINT &w已经出现的"<"数目UINT &K已经出现的"{"数目bool chinese 是否主要保留中文函数的抽象算法: 对于html代码,出现在{}中间的被视为函数体会被无条件的删除,出现在<>中间的代码会当作语言标签被删除。如果是主要保留中文,为了更好的过滤,若一行中没有一个中文字符,则省略该行。=========================================================================================================*/bool MainThread::TrimString(LPTSTR pszBuffer,UINT &w,UINT &k,bool chinese){LPTSTR pszSource = pszBuffer;LPTSTR pszDest = pszBuffer;LPTSTR pszTemp = pszBuffer;bool ch=FALSE;bool mark=FALSE;while (*pszSource != '\0'){if(!ch && (*pszSource)<0) ch=TRUE;//本段字符中是否含有中文字符(汉字机内码以1开始,工作日志(四)有资料)if(*pszSource == '{')k++;if(k==0){//如果未被包含在{}中if(w!=0){//如果包含在<>中if(*pszSource == '>') w--;else if(*pszSource == '<') w++;}else{//未包含在<>中if (*pszSource == '<'){w++;mark=TRUE;}else{if(mark){//说明是'>'后第一个字符;每段文字以空格分开*pszDest=' ';pszDest++;mark=FALSE;}*pszDest = *pszSource;pszDest++;}}}if(*pszSource == '}') k--;pszSource++;}//结束处理if(chinese){if(ch) *pszDest = '\0';else *pszTemp= '\0';//若一行中没有一个中文字符,则省略该行}else *pszDest = '\0';return true;}void MainThread::InsertDB(CString dbUrl, CString dbTitle, CString dbText){CoInitialize(NULL);_ConnectionPtr pConn(__uuidof(Connection));_RecordsetPtr pRst(__uuidof(Recordset));_CommandPtr pCmd(__uuidof(Command));_variant_t RecordsAffected; //申请一个_variant_t类型的的变量pConn->ConnectionString="Provider=MIcrosoft.Jet.OLEDB.4.0;Data source=web.mdb";pConn->Open("","","",adConnectUnspecified);dbText.Replace(_T("'"), _T("''")); //单引号转义CString sql = "INSERT INTO crawler(Url,Title,Content) VALUES ('" + dbUrl + "','" + dbTitle + "','" + dbText + "')";//MessageBox(NULL, sql, NULL, NULL);try{pRst=pConn->Execute(sql.GetBuffer(sql.GetLength()), &RecordsAffected,adCmdText);}catch (...){;}//pRst->Close(); //若有此句可以实现插入,但会产生runtime错误提示 pConn->Close();pCmd.Release();pRst.Release();pConn.Release();CoUninitialize();}void MainThread::UTF8ToGB2312(char *pText, int nLen, CString &strOutput) { if (nLen <= 0) { return ; } char *PBuf = strOutput.GetBuffer(nLen); char cTemp[4] = {0}; int i = 0, j = 0; // Jump "EF BB BF " if necessary. if (memcmp(pText, "\xef\xbb\xbf ", 3) == 0) { i = 3; } while(i < nLen) { if(pText[i] > 0) { PBuf[j++] = pText[i++]; } else { WCHAR Wtemp; char* uchar = (char *)&Wtemp; uchar[1] = ((pText[i] & 0x0F) << 4) + ((pText[i+1] >> 2) & 0x0F); uchar[0] = ((pText[i+1] & 0x03) << 6) + (pText[i+2] & 0x3F); WideCharToMultiByte(CP_ACP, NULL, &Wtemp, 1, cTemp, sizeof(WCHAR), NULL, NULL); PBuf[j] = cTemp[0]; PBuf[j+1] = cTemp[1]; i += 3; j += 2; } } PBuf[j] = '\0'; strOutput.ReleaseBuffer(); }


创作不易,小小的支持一下吧!



声明

本文内容仅代表作者观点,或转载于其他网站,本站不以此文作为商业用途
如有涉及侵权,请联系本站进行删除
转载本站原创文章,请注明来源及作者。