文件结构
-App.ico
-AssemblyInfo.cs
+Backup
-App.ico
-AssemblyInfo.cs
+Doc
-BitUtils.cs
-FileOffset.cs
-FileOffsetCollection.cs
-PieceDescriptor.cs
-PieceDescriptorCollection.cs
-TextLoader.cs
-GetDocText.csproj
-GetDocText.sln
-MainForm.cs
-MainForm.resx
+Native
-Enums.cs
-Guids.cs
-IEnumSTATSTG.cs
-IStorage.cs
-NativeMethods.cs
-Structs.cs
+Ole
-OleStorage.cs
-OleStream.cs
+Backup1
-App.ico
-AssemblyInfo.cs
+Doc
-BitUtils.cs
-FileOffset.cs
-FileOffsetCollection.cs
-PieceDescriptor.cs
-PieceDescriptorCollection.cs
-TextLoader.cs
-GetDocText.csproj
-GetDocText.sln
-MainForm.cs
-MainForm.resx
+Native
-Enums.cs
-Guids.cs
-IEnumSTATSTG.cs
-IStorage.cs
-NativeMethods.cs
-Structs.cs
+Ole
-OleStorage.cs
-OleStream.cs
+bin
+Debug
-GetDocText.exe
-GetDocText.pdb
-GetDocText.vshost.exe
+Doc
-BitUtils.cs
-FileOffset.cs
-FileOffsetCollection.cs
-PieceDescriptor.cs
-PieceDescriptorCollection.cs
-TextLoader.cs
-GetDocText.csproj
-GetDocText.sln
-MainForm.cs
-MainForm.resx
+Native
-Enums.cs
-Guids.cs
-IEnumSTATSTG.cs
-IStorage.cs
-NativeMethods.cs
-Structs.cs
+obj
+Debug
-GetDocText.csproj.GenerateResource.Cache
-GetDocText.exe
-GetDocText.MainForm.resources
-GetDocText.pdb
+Refactor
+TempPE
-GetDocText.csproj.FileListAbsolute.txt
+Ole
-OleStorage.cs
-OleStream.cs
-UpgradeLog.XML
-UpgradeLog2.XML
+_UpgradeReport_Files
-UpgradeReport.css
-UpgradeReport.xslt
-UpgradeReport_Minus.gif
-UpgradeReport_Plus.gif
#region Copyright (c) 2006-2008 Cellbi
/*
Cellbi Software Component Product
Copyright (c) 2006-2008 Cellbi
www.cellbi.com
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. The names of the authors may not be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED 揂S IS?AND ANY EXPRESSED OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CELLBI
OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#endregion
using System;
using System.IO;
using System.Windows.Forms;
using GetDocText.Doc;
namespace GetDocText
{
/// <summary>
/// Summary description for Form1.
/// </summary>
public class MainForm : System.Windows.Forms.Form
{
// private fields ...
private System.Windows.Forms.TextBox edPath;
private System.Windows.Forms.Label lblPath;
private System.Windows.Forms.Button btnOpen;
private System.Windows.Forms.RichTextBox rtbText;
private System.Windows.Forms.OpenFileDialog dlgOpenFile;
private System.ComponentModel.Container components = null;
// constructors ...
#region MainForm
public MainForm()
{
InitializeComponent();
}
#endregion
#region Dispose
protected override void Dispose( bool disposing )
{
if( disposing )
{
if (components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
}
#endregion
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.edPath = new System.Windows.Forms.TextBox();
this.lblPath = new System.Windows.Forms.Label();
this.btnOpen = new System.Windows.Forms.Button();
this.rtbText = new System.Windows.Forms.RichTextBox();
this.dlgOpenFile = new System.Windows.Forms.OpenFileDialog();
this.SuspendLayout();
//
// edPath
//
this.edPath.Anchor =
((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.edPath.Location = new System.Drawing.Point(96, 17);
this.edPath.Name = "edPath";
this.edPath.Size = new System.Drawing.Size(280, 21);
this.edPath.TabIndex = 0;
//
// lblPath
//
this.lblPath.AutoSize = true;
this.lblPath.Location = new System.Drawing.Point(10, 17);
this.lblPath.Name = "lblPath";
this.lblPath.Size = new System.Drawing.Size(71, 12);
this.lblPath.TabIndex = 1;
this.lblPath.Text = "File path :";
//
// btnOpen
//
this.btnOpen.Anchor =
((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top
| System.Windows.Forms.AnchorStyles.Right)));
this.btnOpen.FlatStyle = System.Windows.Forms.FlatStyle.System;
this.btnOpen.Location = new System.Drawing.Point(386, 16);
this.btnOpen.Name = "btnOpen";
this.btnOpen.Size = new System.Drawing.Size(77, 25);
this.btnOpen.TabIndex = 2;
this.btnOpen.Text = "Open";
this.btnOpen.Click += new System.EventHandler(this.btnOpen_Click);
//
// rtbText
//
this.rtbText.Anchor =
((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top
| System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.rtbText.Location = new System.Drawing.Point(10, 52);
this.rtbText.Name = "rtbText";
this.rtbText.Size = new System.Drawing.Size(453, 229);
this.rtbText.TabIndex = 3;
this.rtbText.Text = "";
//
// dlgOpenFile
//
this.dlgOpenFile.DefaultExt = "doc";
this.dlgOpenFile.Filter = "Doc files|*.doc";
this.dlgOpenFile.Title = "Open *.doc file";
//
// MainForm
//
this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
this.ClientSize = new System.Drawing.Size(472, 296);
this.Controls.Add(this.rtbText);
this.Controls.Add(this.btnOpen);
this.Controls.Add(this.lblPath);
this.Controls.Add(this.edPath);
this.MinimumSize = new System.Drawing.Size(480, 323);
this.Name = "MainForm";
this.Text = "View text";
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
[STAThread]
static void Main()
{
Application.EnableVisualStyles();
Application.Run(new MainForm());
}
// private methods ...
// event handlers ...
private void btnOpen_Click(object sender, System.EventArgs e)
{
DialogResult dialogResult = dlgOpenFile.ShowDialog();
if (dialogResult != DialogResult.OK && dialogResult != DialogResult.Yes)
return;
edPath.Clear();
rtbText.Clear();
string text;
TextLoader loader = new TextLoader(dlgOpenFile.FileName);
if (!loader.LoadText(out text))
return;
edPath.Text = dlgOpenFile.FileName;
rtbText.Text = text;
}
}
}
#region Copyright (c) 2006-2008 Cellbi
/*
Cellbi Software Component Product
Copyright (c) 2006-2008 Cellbi
www.cellbi.com
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. The names of the authors may not be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED 揂S IS?AND ANY EXPRESSED OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CELLBI
OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#endregion
using System;
using System.IO;
using GetDocText.Native;
using GetDocText.Ole;
namespace GetDocText.Doc
{
/// <summary>
/// Implements loading of the text from the doc files.
/// </summary>
public class TextLoader
{
string _Path;
// constructors ...
/// <summary>
/// Creates new instance of the TextLoader.
/// </summary>
/// <param name="path">The path of the file to load the text</param>
public TextLoader(string path)
{
_Path = path;
}
// private methods ...
BinaryReader GetReader(OleStream stream)
{
if (stream == null)
return null;
byte[] streamData = stream.ReadToEnd();
MemoryStream memoryStream = new MemoryStream(streamData);
return new BinaryReader(memoryStream);
}
BinaryReader GetStreamReader(OleStorage storage, string streamName)
{
OleStream stream = storage.OpenStream(streamName);
if (stream == null)
return null;
return GetReader(stream);
}
BinaryReader GetDocumentStreamReader(OleStorage storage)
{
return GetStreamReader(storage, "WordDocument");
}
BinaryReader GetTableStreamReader(OleStorage storage, string tableName)
{
return GetStreamReader(storage, tableName);
}
void GetDataFromFib(BinaryReader reader, out string tableName, out int pdcOffset, out uint pdcLength)
{
reader.BaseStream.Seek(10, SeekOrigin.Begin);
ushort flags = reader.ReadUInt16();
tableName = BitUtils.IsSet(flags, 9) ? "1Table" : "0Table";
reader.BaseStream.Seek(418, SeekOrigin.Begin);
pdcOffset = reader.ReadInt32();
pdcLength = reader.ReadUInt32();
}
PieceDescriptorCollection GetPieceDescriptors(BinaryReader reader, int offset, uint length)
{
PieceDescriptorCollection result = new PieceDescriptorCollection(offset, length);
result.Read(reader);
return result;
}
string ReadString(BinaryReader reader, uint length, bool isUnicode)
{
if (length == 0)
return string.Empty;
if (isUnicode)
length = length / 2;
string result = string.Empty;
for (int i = 0; i < length; i++)
{
if (!isUnicode)
{
byte ch = reader.ReadByte();
result += (char)ch;
}
else
{
short ch = reader.ReadInt16();
result += (char)ch;
}
}
return result;
}
bool LoadText(OleStorage storage, out string text)
{
text = string.Empty;
if (storage == null)
return false;
BinaryReader documentReader = GetDocumentStreamReader(storage);
if (documentReader == null)
return false;
int pdcOffset;
uint pdcLength;
string tableName;
GetDataFromFib(documentReader, out tableName, out pdcOffset, out pdcLength);
BinaryReader tableReader = GetTableStreamReader(storage, tableName);
if (tableReader == null)
return false;
PieceDescriptorCollection pieces = GetPieceDescriptors(tableReader, pdcOffset, pdcLength);
if (pieces == null)
return false;
int count = pieces.Count;
for (int i = 0; i < count; i++)
{
uint pieceStart;
uint pieceEnd;
bool isUnicode = pieces.GetPieceFileBounds(i, out pieceStart, out pieceEnd);
documentReader.BaseStream.Seek(pieceStart, SeekOrigin.Begin);
text += ReadString(documentReader, pieceEnd - pieceStart, isUnicode);
}
return true;
}
// public methods ...
/// <summary>
/// Loads text from the file.
/// </summary>
/// <param name="text">The text of the file</param>
public bool LoadText(out string text)
{
text = string.Empty;
if (NativeMethods.StgIsStorageFile(_Path) != 0)
return false;
OleStorage storage = OleStorage.CreateInstance(_Path);
try
{
return LoadText(storage, out text);
}
finally
{
storage.Close();
}
}
}
}
#region Copyright (c) 2006-2008 Cellbi
/*
Cellbi Software Component Product
Copyright (c) 2006-2008 Cellbi
www.cellbi.com
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. The names of the authors may not be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED 揂S IS?AND ANY EXPRESSED OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CELLBI
OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#endregion
using System;
namespace GetDocText.Doc
{
/// <summary>
/// Summary description for BitUtils.
/// </summary>
public class BitUtils
{
// constructors ...
protected BitUtils()
{
}
// public methods ...
/// <summary>
/// Defines if specified bit is set.
/// </summary>
/// <param name="target">The target to check bit from</param>
/// <param name="bit">The bit to check</param>
public static bool IsSet(ushort target, byte bit)
{
return (target & (0x1 << bit)) > 0;
}
}
}
#region Copyright (c) 2006-2008 Cellbi
/*
Cellbi Software Component Product
Copyright (c) 2006-2008 Cellbi
www.cellbi.com
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. The names of the authors may not be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED 揂S IS?AND ANY EXPRESSED OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CELLBI
OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#endregion
using System;
using System.Collections;
using System.IO;
namespace GetDocText.Doc
{
/// <summary>
/// Information about complex document part.
/// </summary>
internal class PieceDescriptorCollection
{
int _Offset;
uint _Length;
PieceDescriptor[] _Descriptors;
FileOffsetCollection _DescriptorOffsets;
// constructors ...
/// <summary>
/// Creates new instance of the PieceDescriptorCollection.
/// </summary>
/// <param name="offset">The offset of current structure in the stream</param>
/// <param name="length">The length of current structure</param>
public PieceDescriptorCollection(int offset, uint length)
{
_Offset = offset;
_Length = length;
}
// private methods ...
int GetOffsetsCount(int size, int structureSize)
{
return GetDescriptorsCount(size, structureSize) + 1;
}
int GetDescriptorsCount(int size, int structureSize)
{
int ptrSize = 4;
return (size - ptrSize) / (structureSize + ptrSize);
}
PieceDescriptor[] ReadDescriptors(BinaryReader reader, int count)
{
ArrayList result = new ArrayList();
for (int i = 0; i < count; i++)
{
PieceDescriptor descriptor = new PieceDescriptor();
descriptor.Read(reader);
result.Add(descriptor);
}
return (PieceDescriptor[])result.ToArray(typeof(PieceDescriptor));
}
// public methods ...
/// <summary>
/// Reads data using given reader.
/// </summary>
/// <param name="reader">The binary reader to use.</param>
public void Read(BinaryReader reader)
{
reader.BaseStream.Seek(_Offset, SeekOrigin.Begin);
while (reader.BaseStream.Position < _Offset + _Length)
{
byte byteType = reader.ReadByte();
switch(byteType)
{
case 0:
reader.ReadByte();
break;
case 1:
short cbGrpprl = reader.ReadInt16();
byte[] grpprlData = reader.ReadBytes(cbGrpprl);
break;
case 2:
int tableLen = reader.ReadInt32();
_DescriptorOffsets = new FileOffsetCollection();
_DescriptorOffsets.Read(reader, GetOffsetsCount(tableLen, PieceDescriptor.Size));
_Descriptors = ReadDescriptors(reader, GetDescriptorsCount(tableLen, PieceDescriptor.Size));
break;
}
}
}
/// <summary>
/// Gets bounds of the specified piece in the file.
/// </summary>
/// <param name="piece">The number od the piece</param>
/// <param name="start">The start file offset</param>
/// <param name="end">The end file offset</param>
public bool GetPieceFileBounds(int piece, out uint start, out uint end)
{
start = 0xffffffff;
end = 0xffffffff;
PieceDescriptor pd = _Descriptors[piece];
uint fc = pd.Fc;
bool isUnicode = FileOffset.IsUnicode(fc);
start = FileOffset.NormalizeFc(fc);
uint length = _DescriptorOffsets[piece + 1].Value - _DescriptorOffsets[piece].Value;
end = (uint)(start + length * FileOffset.GetFcDelta(isUnicode));
return isUnicode;
}
// public properties ...
/// <summary>
/// The count of piece descriptors in the collection
/// </summary>
public int Count
{
get
{
return _Descriptors.Length;
}
}
}
}