diff options
| author | Victoria Plitt <Victoria.Plitt@twine-s.com> | 2019-04-08 13:49:55 +0300 |
|---|---|---|
| committer | Victoria Plitt <Victoria.Plitt@twine-s.com> | 2019-04-08 13:49:55 +0300 |
| commit | fc8a05358a92cc3c77c5f1e30d536807ef0614fd (patch) | |
| tree | c65f696ebd60f3790145721307c255e5a339923f /Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Utils/FileReader.cs | |
| parent | b4a71931ea52636c6b36376aa9d71697ccf73524 (diff) | |
| download | Tango-fc8a05358a92cc3c77c5f1e30d536807ef0614fd.tar.gz Tango-fc8a05358a92cc3c77c5f1e30d536807ef0614fd.zip | |
were added scripting projects
Diffstat (limited to 'Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Utils/FileReader.cs')
| -rw-r--r-- | Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Utils/FileReader.cs | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Utils/FileReader.cs b/Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Utils/FileReader.cs new file mode 100644 index 000000000..b44c589d7 --- /dev/null +++ b/Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Utils/FileReader.cs @@ -0,0 +1,208 @@ +// Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt) +// This code is distributed under the GNU LGPL (for details please see \doc\license.txt) + +using System; +using System.IO; +using System.Text; + +namespace Tango.Scripting.Editors.Utils +{ + /// <summary> + /// Class that can open text files with auto-detection of the encoding. + /// </summary> + public static class FileReader + { + /// <summary> + /// Gets if the given encoding is a Unicode encoding (UTF). + /// </summary> + /// <remarks> + /// Returns true for UTF-7, UTF-8, UTF-16 LE, UTF-16 BE, UTF-32 LE and UTF-32 BE. + /// Returns false for all other encodings. + /// </remarks> + public static bool IsUnicode(Encoding encoding) + { + if (encoding == null) + throw new ArgumentNullException("encoding"); + switch (encoding.CodePage) { + case 65000: // UTF-7 + case 65001: // UTF-8 + case 1200: // UTF-16 LE + case 1201: // UTF-16 BE + case 12000: // UTF-32 LE + case 12001: // UTF-32 BE + return true; + default: + return false; + } + } + + /// <summary> + /// Reads the content of the given stream. + /// </summary> + /// <param name="stream">The stream to read. + /// The stream must support seeking and must be positioned at its beginning.</param> + /// <param name="defaultEncoding">The encoding to use if the encoding cannot be auto-detected.</param> + /// <returns>The file content as string.</returns> + public static string ReadFileContent(Stream stream, Encoding defaultEncoding) + { + using (StreamReader reader = OpenStream(stream, defaultEncoding)) { + return reader.ReadToEnd(); + } + } + + /// <summary> + /// Reads the content of the file. + /// </summary> + /// <param name="fileName">The file name.</param> + /// <param name="defaultEncoding">The encoding to use if the encoding cannot be auto-detected.</param> + /// <returns>The file content as string.</returns> + public static string ReadFileContent(string fileName, Encoding defaultEncoding) + { + using (FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read)) { + return ReadFileContent(fs, defaultEncoding); + } + } + + /// <summary> + /// Opens the specified file for reading. + /// </summary> + /// <param name="fileName">The file to open.</param> + /// <param name="defaultEncoding">The encoding to use if the encoding cannot be auto-detected.</param> + /// <returns>Returns a StreamReader that reads from the stream. Use + /// <see cref="StreamReader.CurrentEncoding"/> to get the encoding that was used.</returns> + public static StreamReader OpenFile(string fileName, Encoding defaultEncoding) + { + if (fileName == null) + throw new ArgumentNullException("fileName"); + FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); + try { + return OpenStream(fs, defaultEncoding); + // don't use finally: the stream must be kept open until the StreamReader closes it + } catch { + fs.Dispose(); + throw; + } + } + + /// <summary> + /// Opens the specified stream for reading. + /// </summary> + /// <param name="stream">The stream to open.</param> + /// <param name="defaultEncoding">The encoding to use if the encoding cannot be auto-detected.</param> + /// <returns>Returns a StreamReader that reads from the stream. Use + /// <see cref="StreamReader.CurrentEncoding"/> to get the encoding that was used.</returns> + public static StreamReader OpenStream(Stream stream, Encoding defaultEncoding) + { + if (stream == null) + throw new ArgumentNullException("stream"); + if (stream.Position != 0) + throw new ArgumentException("stream is not positioned at beginning.", "stream"); + if (defaultEncoding == null) + throw new ArgumentNullException("defaultEncoding"); + + if (stream.Length >= 2) { + // the autodetection of StreamReader is not capable of detecting the difference + // between ISO-8859-1 and UTF-8 without BOM. + int firstByte = stream.ReadByte(); + int secondByte = stream.ReadByte(); + switch ((firstByte << 8) | secondByte) { + case 0x0000: // either UTF-32 Big Endian or a binary file; use StreamReader + case 0xfffe: // Unicode BOM (UTF-16 LE or UTF-32 LE) + case 0xfeff: // UTF-16 BE BOM + case 0xefbb: // start of UTF-8 BOM + // StreamReader autodetection works + stream.Position = 0; + return new StreamReader(stream); + default: + return AutoDetect(stream, (byte)firstByte, (byte)secondByte, defaultEncoding); + } + } else { + if (defaultEncoding != null) { + return new StreamReader(stream, defaultEncoding); + } else { + return new StreamReader(stream); + } + } + } + + static StreamReader AutoDetect(Stream fs, byte firstByte, byte secondByte, Encoding defaultEncoding) + { + int max = (int)Math.Min(fs.Length, 500000); // look at max. 500 KB + const int ASCII = 0; + const int Error = 1; + const int UTF8 = 2; + const int UTF8Sequence = 3; + int state = ASCII; + int sequenceLength = 0; + byte b; + for (int i = 0; i < max; i++) { + if (i == 0) { + b = firstByte; + } else if (i == 1) { + b = secondByte; + } else { + b = (byte)fs.ReadByte(); + } + if (b < 0x80) { + // normal ASCII character + if (state == UTF8Sequence) { + state = Error; + break; + } + } else if (b < 0xc0) { + // 10xxxxxx : continues UTF8 byte sequence + if (state == UTF8Sequence) { + --sequenceLength; + if (sequenceLength < 0) { + state = Error; + break; + } else if (sequenceLength == 0) { + state = UTF8; + } + } else { + state = Error; + break; + } + } else if (b >= 0xc2 && b < 0xf5) { + // beginning of byte sequence + if (state == UTF8 || state == ASCII) { + state = UTF8Sequence; + if (b < 0xe0) { + sequenceLength = 1; // one more byte following + } else if (b < 0xf0) { + sequenceLength = 2; // two more bytes following + } else { + sequenceLength = 3; // three more bytes following + } + } else { + state = Error; + break; + } + } else { + // 0xc0, 0xc1, 0xf5 to 0xff are invalid in UTF-8 (see RFC 3629) + state = Error; + break; + } + } + fs.Position = 0; + switch (state) { + case ASCII: + case Error: + // when the file seems to be ASCII or non-UTF8, + // we read it using the user-specified encoding so it is saved again + // using that encoding. + if (IsUnicode(defaultEncoding)) { + // the file is not Unicode, so don't read it using Unicode even if the + // user has choosen Unicode as the default encoding. + + // If we don't do this, SD will end up always adding a Byte Order Mark + // to ASCII files. + defaultEncoding = Encoding.Default; // use system encoding instead + } + return new StreamReader(fs, defaultEncoding); + default: + return new StreamReader(fs); + } + } + } +} |
