csharp:Optical Character Recognition
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Drawing;
using System.IO;
using System.Drawing.Imaging;
using MODI;//Microsoft Office Document Imaging
// 首先用office安装盘这个组件,默认安装office的时候是不会安装的,只要添加这个组件功能就好了安装说明:http://support.microsoft.com/kb/982760
//组件Microsoft Office Document Imaging 12.0 Type Library(office2007)
//或者Microsoft Office Document Imaging 11.0 Type Library(office2003)
//中文简体OCR引擎 http://www.microsoft.com/downloads/thankyou.aspx?familyId=dd172063-9517-41d8-82af-29c38f7437b6&displayLang=zh-hk
namespace ToText
{
/// <summary>
/// Optical Character Recognition光学字符识别
/// 20140507 Geovin Du
/// 涂聚文
/// </summary>
public static class OCRGetstring
{
/// <summary>
/// 语言类型
/// </summary>
/// <returns></returns>
public static DataTable getLanguageList()
{
DataTable dt = new DataTable();
dt.Columns.Add("ID",typeof(int));
dt.Columns.Add("LanguageName", typeof(string));
dt.Columns.Add("LanguageLCID", typeof(string));
//dt.Rows.Add(1, "", 1);
dt.Rows.Add(1,"简体中文", "2052");
dt.Rows.Add(2,"繁体中文", "1028");
dt.Rows.Add(3,"英语", "9");
dt.Rows.Add(4,"捷克语", "5");
dt.Rows.Add(5,"丹麦语", "6");
dt.Rows.Add(6,"德语", "7");
dt.Rows.Add(7,"希腊语", "8");
dt.Rows.Add(8,"西班牙语", "10");
dt.Rows.Add(9,"芬兰语", "11");
dt.Rows.Add(10,"法语", "12");
dt.Rows.Add(11, "匈牙利语", "14");
dt.Rows.Add(12, "意大利语", "16");
dt.Rows.Add(13, "日语", "17");
dt.Rows.Add(14, "韩语", "18");
dt.Rows.Add(15, "荷兰语", "19");
dt.Rows.Add(16, "挪威语", "20");
dt.Rows.Add(17, "波兰语", "21");
dt.Rows.Add(18, "葡萄牙语", "22");
dt.Rows.Add(19, "俄语", "25");
dt.Rows.Add(20,"瑞典语", "29");
dt.Rows.Add(21,"土耳其语", "31");
return dt;
}
/// <summary>
///
/// </summary>
/// <param name="sValue"></param>
/// <returns></returns>
private static MODI.MiLANGUAGES GetLanuageType(string sValue)
{
switch (sValue)
{
case "2052":
return MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED;
case "5":
return MODI.MiLANGUAGES.miLANG_CZECH;
case "6":
return MODI.MiLANGUAGES.miLANG_DANISH;
case "7":
return MODI.MiLANGUAGES.miLANG_GERMAN;
case "8":
return MODI.MiLANGUAGES.miLANG_GREEK;
case "9":
return MODI.MiLANGUAGES.miLANG_ENGLISH;
case "10":
return MODI.MiLANGUAGES.miLANG_SPANISH;
case "11":
return MODI.MiLANGUAGES.miLANG_FINNISH;
case "12":
return MODI.MiLANGUAGES.miLANG_FRENCH;
case "14":
return MODI.MiLANGUAGES.miLANG_HUNGARIAN;
case "16":
return MODI.MiLANGUAGES.miLANG_ITALIAN;
case "17":
return MODI.MiLANGUAGES.miLANG_JAPANESE;
case "18":
return MODI.MiLANGUAGES.miLANG_KOREAN;
case "19":
return MODI.MiLANGUAGES.miLANG_DUTCH;
case "20":
return MODI.MiLANGUAGES.miLANG_NORWEGIAN;
case "21":
return MODI.MiLANGUAGES.miLANG_POLISH;
case "22":
return MODI.MiLANGUAGES.miLANG_PORTUGUESE;
case "25":
return MODI.MiLANGUAGES.miLANG_RUSSIAN;
case "29":
return MODI.MiLANGUAGES.miLANG_SWEDISH;
case "31":
return MODI.MiLANGUAGES.miLANG_TURKISH;
case "1028":
return MODI.MiLANGUAGES.miLANG_CHINESE_TRADITIONAL;
default:
return MODI.MiLANGUAGES.miLANG_ENGLISH;
}
}
/// <summary>
/// Images轉換文字
/// </summary>
/// <param name="image">Image</param>
/// <param name="language">语言类型</param>
/// <returns></returns>
public static string ExtractText(this System.Drawing.Image image,string language)
{
var tmpFile = Path.GetTempFileName();
StringBuilder sb = new StringBuilder();
//string text;
try
{
var bmp = new Bitmap(Math.Max(image.Width, 1024), Math.Max(image.Height, 768));
var gfxResize = Graphics.FromImage(bmp);
gfxResize.DrawImage(image, new Rectangle(0, 0, image.Width, image.Height));
bmp.Save(tmpFile + ".bmp", ImageFormat.Bmp);
var doc = new MODI.Document();
doc.Create(tmpFile + ".bmp");
// doc.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true);
doc.OCR(GetLanuageType(language), true, true); // 识别文字类型
var img = (MODI.Image)doc.Images[0];
var layout = img.Layout;
sb.Append(layout.Text);
//text = sb.ToString();// layout.Text;
}
finally
{
File.Delete(tmpFile);
File.Delete(tmpFile + ".bmp");
}
return sb.ToString();// text;
}
/// <summary>
/// 来源图片文件轉換文字
/// </summary>
/// <param name="fileToOCR">file文件</param>
/// <param name="language">语言类型</param>
/// <returns></returns>
public static string getFileToOCR(string fileToOCR, string language)
{
StringBuilder sb = new StringBuilder();
if (File.Exists(fileToOCR))
{
MODI.Document md = new MODI.Document();
md.Create(fileToOCR);
md.OCR(GetLanuageType(language), true, true);
MODI.Image img;
MODI.Layout layout;
for (int i = 0; i < md.Images.Count; i++)
{
img = (MODI.Image)md.Images[i];
layout = img.Layout;
sb.Append(layout.Text);
}
md.Close(false);
}
else
{
sb.Append("");
}
return sb.ToString();
}
}
}
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Runtime.InteropServices;
namespace ToText
{
/// <summary>
/// geovindu
/// </summary>
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
#region DllImport
[DllImport("AspriseOCR.dll", EntryPoint = "OCR", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr OCR(string file, int type);
[DllImport("AspriseOCR.dll", EntryPoint = "OCRpart", CallingConvention = CallingConvention.Cdecl)]
static extern IntPtr OCRpart(string file, int type, int startX, int startY, int width, int height);
[DllImport("AspriseOCR.dll", EntryPoint = "OCRBarCodes", CallingConvention = CallingConvention.Cdecl)]
static extern IntPtr OCRBarCodes(string file, int type);
[DllImport("AspriseOCR.dll", EntryPoint = "OCRpartBarCodes", CallingConvention = CallingConvention.Cdecl)]
static extern IntPtr OCRpartBarCodes(string file, int type, int startX, int startY, int width, int height);
#endregion
#region 转换按钮事件
/// <summary>
/// 转换按钮事件
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button2_Click(object sender, EventArgs e)
{
int startX = 0;
int startY = 0;
int width = -1;
int height = -1;
string img_path = txt_imgpath.Text; // 图片路径
if (String.IsNullOrEmpty(img_path)) // 图片非空验证
{
MessageBox.Show("请先选择图片!");
return;
}
try
{
Image img = Image.FromFile(img_path);
width = img.Width;
height = img.Height;
}
catch (Exception ex)
{
MessageBox.Show(ex.StackTrace);
}
txt_result.Text = Marshal.PtrToStringAnsi(OCRpart(img_path, -1, startX, startY, width, height));
}
#endregion
#region 浏览事件
/// <summary>
/// 浏览事件
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void btn_imgpath_Click(object sender, EventArgs e)
{
openFileDialog1.ShowDialog();
txt_imgpath.Text = openFileDialog1.FileName;
}
/// <summary>
/// 浏览图片
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void txt_imgpath_Click(object sender, EventArgs e)
{
openFileDialog1.ShowDialog();
txt_imgpath.Text = openFileDialog1.FileName;
}
#endregion
/// <summary>
///
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void Form1_Load(object sender, EventArgs e)
{
}
}
}
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)
浙公网安备 33010602011771号