Java 类org.apache.commons.io.input.BOMInputStream 实例源码
项目:Gargoyle
文件:XmlFileReadModel.java
@Override
public List<String> readLines(URL url) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (InputStream in = new BOMInputStream(url.openStream());) {
int tmp = -1;
while ((tmp = in.read()) != -1) {
out.write(tmp);
}
}
String string = out.toString();
LOGGER.debug(string);
XMLDiffFormatter xmlFormatter = new XMLDiffFormatter();
String format = xmlFormatter.format(string);
return Stream.of(format.split("\n")).collect(Collectors.toList());
}
项目:DigitalMediaServer
文件:PlaylistFolder.java
private BufferedReader getBufferedReader() throws IOException {
String extension;
Charset charset;
if (FileUtil.isUrl(uri)) {
extension = FileUtil.getUrlExtension(uri).toLowerCase(PMS.getLocale());
} else {
extension = FileUtil.getExtension(uri).toLowerCase(PMS.getLocale());
}
if (extension != null && (extension.equals("m3u8") || extension.equals(".cue"))) {
charset = StandardCharsets.UTF_8;
} else {
charset = StandardCharsets.ISO_8859_1;
}
if (FileUtil.isUrl(uri)) {
return new BufferedReader(new InputStreamReader(new BOMInputStream(new URL(uri).openStream()), charset));
} else {
File playlistfile = new File(uri);
if (playlistfile.length() < 10000000) {
return new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(playlistfile)), charset));
}
}
return null;
}
项目:mojito
文件:CommandHelper.java
/**
* Get content from {@link java.nio.file.Path} using UTF8
*
* @param path
* @return
* @throws CommandException
*/
public String getFileContent(Path path) throws CommandException {
try {
File file = path.toFile();
BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(file), false, boms);
String fileContent;
if (inputStream.hasBOM()) {
fileContent = IOUtils.toString(inputStream, inputStream.getBOMCharsetName());
} else {
fileContent = IOUtils.toString(inputStream, StandardCharsets.UTF_8);
}
return fileContent;
} catch (IOException e) {
throw new CommandException("Cannot get file content for path: " + path.toString(), e);
}
}
项目:dss
文件:TestBOM.java
@Test
public void test() throws IOException {
ApacheCommonsUtils acu = new ApacheCommonsUtils();
FileInputStream fis = new FileInputStream(new File("src/test/resources/lotl_utf-8-sansbom.xml"));
FileInputStream fisBom = new FileInputStream(new File("src/test/resources/lotl_utf-8.xml"));
assertNotEquals(acu.toBase64(acu.toByteArray(fis)), acu.toBase64(acu.toByteArray(fisBom)));
fis = new FileInputStream(new File("src/test/resources/lotl_utf-8-sansbom.xml"));
fisBom = new FileInputStream(new File("src/test/resources/lotl_utf-8.xml"));
BOMInputStream bomIS = new BOMInputStream(fis);
BOMInputStream bomISSkipped = new BOMInputStream(fisBom);
assertEquals(acu.toBase64(acu.toByteArray(bomIS)), acu.toBase64(acu.toByteArray(bomISSkipped)));
}
项目:crawler-commons
文件:SiteMapParser.java
/**
* Process a text-based Sitemap. Text sitemaps only list URLs but no
* priorities, last mods, etc.
*
* @param sitemapUrl
* URL to sitemap file
* @param stream
* content stream
* @return The site map
* @throws IOException
* if there is an error reading in the site map content
*/
protected SiteMap processText(URL sitemapUrl, InputStream stream) throws IOException {
LOG.debug("Processing textual Sitemap");
SiteMap textSiteMap = new SiteMap(sitemapUrl);
textSiteMap.setType(SitemapType.TEXT);
BOMInputStream bomIs = new BOMInputStream(stream);
@SuppressWarnings("resource")
BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs, UTF_8));
String line;
int i = 1;
while ((line = reader.readLine()) != null) {
if (line.length() > 0 && i <= MAX_URLS) {
addUrlIntoSitemap(line, textSiteMap, null, null, null, i++);
}
}
textSiteMap.setProcessed(true);
return textSiteMap;
}
项目:crawler-commons
文件:SiteMapParser.java
/**
* Decompress the gzipped content and process the resulting XML Sitemap.
*
* @param url
* - URL of the gzipped content
* @param response
* - Gzipped content
* @return the site map
* @throws UnknownFormatException
* if there is an error parsing the gzip
* @throws IOException
* if there is an error reading in the gzip {@link java.net.URL}
*/
protected AbstractSiteMap processGzippedXML(URL url, byte[] response) throws IOException, UnknownFormatException {
LOG.debug("Processing gzipped XML");
InputStream is = new ByteArrayInputStream(response);
// Remove .gz ending
String xmlUrl = url.toString().replaceFirst("\\.gz$", "");
LOG.debug("XML url = {}", xmlUrl);
BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
InputSource in = new InputSource(decompressed);
in.setSystemId(xmlUrl);
return processXml(url, in);
}
项目:easyjasub
文件:InputTextSubFile.java
public InputTextSubFile(SubtitleFileType inputFormat, String fileName,
InputStream is) throws InputTextSubException, IOException {
try {
tto = createFormat(inputFormat).parseFile(fileName,
new BOMInputStream(is));
} catch (FatalParsingException ex) {
throw new InputTextSubException(
"Parse error returned by subtitle read library", ex);
}
captions = new ArrayList<InputSubtitleLine>(tto.captions.size());
for (Caption caption : tto.captions.values()) {
InputSubtitleLine line = new InputSubtitleLine();
line.setContent(caption.content);
line.setStartTime(new SubtitleFileTimeWrapper(caption.start)
.getMSeconds());
line.setEndTime(new SubtitleFileTimeWrapper(caption.end)
.getMSeconds());
captions.add(line);
}
}
项目:org.fastnate
文件:AbstractCsvReader.java
/**
* Opens a CSV file.
*
* If the given file ends with "gz", then the file is decompressed before using a {@link GZIPInputStream}.
*
* @param importFile
* the csv file
* @return a list reader
* @throws IOException
* on io exception
*/
@SuppressWarnings("resource")
protected CsvListReader openCsvListReader(final File importFile) throws IOException {
// Open file
InputStream fileStream = new FileInputStream(importFile);
// Check for compressed file
if (importFile.getName().toLowerCase().endsWith(".gz")) {
fileStream = new GZIPInputStream(fileStream);
}
// Guess the encoding
final BOMInputStream inputStream = new BOMInputStream(fileStream, false, ByteOrderMark.UTF_8,
ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
final String charset;
if (inputStream.hasBOM()) {
charset = inputStream.getBOMCharsetName();
log.info("BOM detected. Using {} as encoding", charset);
} else {
charset = getDefaultEncoding().toString();
log.info("No BOM detected. Assuming {} as encoding", charset);
}
final Reader reader = new InputStreamReader(inputStream, charset);
return new CsvListReader(reader, new CsvPreference.Builder(CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE)
.skipComments(new CommentMatches("(//|/\\*|#|;).*")).build());
}
项目:dclib
文件:ConverterService.java
/**
* Simple invocation. Load template and data from a file, run process
* and return memory model containing results or null if there was a problem.
* Problems/progress reporting live to given reporter
* @param templateFile the name of the template file to use
* @param dataFile the name of the data file to process
* @param report the message reporter
* @param debug set to true to enable voluminous debug message
* @param allowNullRows set to true to allow output even if some rows don't match
* @throws IOException
*/
public Model simpleConvert(String templateFile, String dataFile, ProgressMonitorReporter reporter, boolean debug, boolean allowNullRows) throws IOException {
Template template = TemplateFactory.templateFrom(templateFile, dc);
File dataFileF = new File(dataFile);
String filename = dataFileF.getName();
String filebasename = NameUtils.removeExtension(filename);
put(ConverterProcess.FILE_NAME, filename);
put(ConverterProcess.FILE_BASE_NAME, filebasename);
InputStream is = new BOMInputStream( new FileInputStream(dataFileF) );
ConverterProcess process = new ConverterProcess(dc, is);
process.setDebug(debug);
process.setTemplate( template );
process.setMessageReporter( reporter );
process.setAllowNullRows(allowNullRows);
boolean ok = process.process();
return ok ? process.getModel() : null;
}
项目:rosa
文件:JsonldJenaUtils.java
/**
* Generate a single Jena model from several different files, output it to
* specified OutputStream
* @param aggr String[] String array containing all relevant RDF files "name.extension"
* @param out OutputStream
* @param type an instance of ScDemoFile class
* @throws IOException
*/
public static Model generateAggregateModel(String[] aggr, String lang)
throws IOException {
Model model = ModelFactory.createDefaultModel();
Model subModel = ModelFactory.createDefaultModel();
for (int i=0; i<aggr.length; i++) {
InputStream in = (JsonldJenaUtils.class).getClassLoader().getResourceAsStream(
aggr[i]);
BOMInputStream bIn = new BOMInputStream(in, false);
subModel.read(bIn, null, lang);
model = model.add(subModel);
subModel.removeAll();
bIn.close();
in.close();
}
return model;
}
项目:elasticsearch-river-remote
文件:SiteMapParser.java
/**
* Decompress the gzipped content and process the resulting XML Sitemap.
*
* @param url - URL of the gzipped content
* @param response - Gzipped content
* @throws MalformedURLException
* @throws IOException
* @throws UnknownFormatException
*/
private AbstractSiteMap processGzip(URL url, byte[] response) throws MalformedURLException, IOException,
UnknownFormatException {
logger.debug("Processing gzip");
AbstractSiteMap smi;
InputStream is = new ByteArrayInputStream(response);
// Remove .gz ending
String xmlUrl = url.toString().replaceFirst("\\.gz$", "");
logger.debug("XML url = " + xmlUrl);
BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
InputSource in = new InputSource(decompressed);
in.setSystemId(xmlUrl);
smi = processXml(url, in);
decompressed.close();
return smi;
}
项目:file-type-plugin
文件:FileType.java
private String showByteOfMark(InputStream source) throws IOException {
ByteOrderMark detectedBOM = new BOMInputStream(source).getBOM();
if (detectedBOM == null) {
return "";
}
String bom = detectedBOM.toString();
FileType.logger.log(Level.INFO, "BOM: {0}", bom);
return " w/ " + bom;
}
项目:instalint
文件:FileMetadata.java
private static InputStream streamFile(File file) {
try {
return new BOMInputStream(new FileInputStream(file),
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
} catch (FileNotFoundException e) {
throw new IllegalStateException("File not found: " + file.getAbsolutePath(), e);
}
}
项目:file-format-streaming-converter
文件:XlsxToCsvConverterTest.java
private CSVParser createCsvParser(String inputFileName, String delimiter) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(new BOMInputStream(new FileInputStream(inputFileName)), Charsets.UTF_8));
CSVFormat format = CSVFormat.newFormat(delimiter.charAt(0))
.withSkipHeaderRecord()
.withIgnoreEmptyLines()
.withAllowMissingColumnNames()
.withQuote('"')
.withHeader();
return new CSVParser(reader, format);
}
项目:jijimaku
文件:SubtitleFile.java
public SubtitleFile(String fileName, String fileContents, String stylesStr) throws IOException, FatalParsingException {
LOGGER.debug("Parsing subtitle file {}", fileName);
TimedTextFileFormat timedTextFormat;
switch (FilenameUtils.getExtension(fileName)) {
case "ass":
timedTextFormat = new FormatASS();
break;
case "srt":
timedTextFormat = new FormatSRT();
break;
default:
LOGGER.error("invalid subtitle file extension file: {}", fileName);
throw new UnexpectedError();
}
// Convert String to InputStream to match subtitleFile API
byte[] byteData = fileContents.getBytes("UTF-8");
// Must use BOMInputStream otherwise files with BOM will broke :(((
// => http://stackoverflow.com/questions/4897876/reading-utf-8-bom-marker
try (BOMInputStream inputStream = new BOMInputStream(new ByteArrayInputStream(byteData))) {
timedText = timedTextFormat.parseFile(fileName, inputStream, StandardCharsets.UTF_8);
}
if (timedText.warnings.length() > "List of non fatal errors produced during parsing:\n\n".length()) {
LOGGER.warn("There was some warnings during parsing. See logs.");
LOGGER.debug("Got warnings: {}", "\n" + timedText.warnings);
}
styles = parseStyles(stylesStr);
timedText.styling = styles;
timedText.description = JIJIMAKU_SIGNATURE;
annotationCaptions = new TreeMap<>();
// Initialization: add jijimaku mark and set style to Default
addJijimakuMark();
timedText.captions.values().stream().forEach(c -> c.style = styles.get("Default"));
captionIter = timedText.captions.entrySet().iterator();
}
项目:mojito
文件:CommandHelper.java
/**
* Writes the content into a file using same format as source file
*
* @param content content to be written
* @param path path to the file
* @param sourceFileMatch
* @throws CommandException
*/
public void writeFileContent(String content, Path path, FileMatch sourceFileMatch) throws CommandException {
try {
File outputFile = path.toFile();
BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(sourceFileMatch.getPath().toFile()), false, boms);
if (inputStream.hasBOM()) {
FileUtils.writeByteArrayToFile(outputFile, inputStream.getBOM().getBytes());
FileUtils.writeByteArrayToFile(outputFile, content.getBytes(inputStream.getBOMCharsetName()), true);
} else {
FileUtils.writeStringToFile(outputFile, content, StandardCharsets.UTF_8);
}
} catch (IOException e) {
throw new CommandException("Cannot write file content in path: " + path.toString(), e);
}
}
项目:georocket
文件:MimeTypeUtils.java
/**
* Read the first bytes of the given file and try to determine the file
* format. Read up to 100 KB before giving up.
* @param f the file to read
* @return the file format (or <code>null</code> if the format
* could not be determined)
* @throws IOException if the input stream could not be read
*/
public static String detect(File f) throws IOException {
if (!f.exists()) {
return null;
}
try (BufferedInputStream bis = new BufferedInputStream(new BOMInputStream(
new FileInputStream(f)))) {
return determineFileFormat(bis);
}
}
项目:Open-Clinica-Data-Uploader
文件:UploadController.java
private Path saveFile(MultipartFile file) throws IOException {
// Get the filename and build the local file path
String filename = file.getOriginalFilename();
String directory = System.getProperty("java.io.tmpdir");
String filepath = Paths.get(directory, filename).toString();
// Save the file locally
try (BufferedOutputStream stream =
new BufferedOutputStream(new FileOutputStream(new File(filepath)));
BOMInputStream bis = new BOMInputStream(file.getInputStream(), false)) {
IOUtils.copy(bis, stream);
}
return Paths.get(filepath);
}
项目:webz-server
文件:FileDownloaderWithBOM.java
public FileDownloaderWithBOM(WebzInputStreamDownloader downloader, String defaultEncoding) throws IOException, WebzException {
this.bomIn = (BOMInputStream) new BOMInputStream(downloader.getInputStream(), false, ALL_BOMS);
this.downloader = new FileDownloader(downloader.getFileSpecific(), bomIn);
ByteOrderMark bom = bomIn.getBOM();
if (bom == null) {
actualEncoding = defaultEncoding;
actualNumberOfBytes = downloader.getFileSpecific().getNumberOfBytes();
} else {
actualEncoding = bom.getCharsetName();
actualNumberOfBytes = downloader.getFileSpecific().getNumberOfBytes() - bom.length();
}
reader = new InputStreamReader(bomIn, actualEncoding);
}
项目:spring-usc
文件:EncodingDetector.java
public static InputStreamReader getInputStreamReader(File file, String encoding) throws IOException {
FileInputStream fis = new FileInputStream(file);
logger.debug("Reading file: " + file + " using encoding: " + encoding);
BOMInputStream bis = new BOMInputStream(fis); //So that we can remove the BOM
return new InputStreamReader(bis, encoding);
}
项目:dwca-io
文件:DwcMetaFiles.java
/**
* Read the provided meta descriptor (e.g. meta.xml) and return a {@link Archive}.
* @param metaDescriptor
* @throws SAXException
* @throws IOException
* @throws UnsupportedArchiveException
* @return a new {@link Archive}, never null
*/
public static Archive fromMetaDescriptor(InputStream metaDescriptor) throws SAXException, IOException, UnsupportedArchiveException {
Archive archive = new Archive();
try (BOMInputStream bomInputStream = new BOMInputStream(metaDescriptor)) {
SAXParser p = SAX_FACTORY.newSAXParser();
MetaXMLSaxHandler mh = new MetaXMLSaxHandler(archive);
p.parse(bomInputStream, mh);
} catch (ParserConfigurationException e) {
throw new SAXException(e);
}
return archive;
}
项目:digidoc4j
文件:AsicContainerParser.java
private void extractMimeType(ZipEntry entry) {
try {
InputStream zipFileInputStream = getZipEntryInputStream(entry);
BOMInputStream bomInputStream = new BOMInputStream(zipFileInputStream);
DSSDocument document = new InMemoryDocument(bomInputStream);
mimeType = StringUtils.trim(IOUtils.toString(getDocumentBytes(document), "UTF-8"));
extractAsicEntry(entry, document);
} catch (IOException e) {
logger.error("Error parsing container mime type: " + e.getMessage());
throw new TechnicalException("Error parsing container mime type: " + e.getMessage(), e);
}
}
项目:srclib-java
文件:Resolver.java
/**
* Tries to fetch POM model from maven central for a given dependency
* @param dependency dependency to fetch model to
* @return POM model if found and valid
* @throws IOException
* @throws XmlPullParserException
*/
private static Model fetchModel(RawDependency dependency)
throws IOException, XmlPullParserException {
// Get the url to the POM file for this artifact
String url = "http://central.maven.org/maven2/"
+ dependency.groupID.replace('.', '/') + '/' + dependency.artifactID + '/'
+ dependency.version + '/' + dependency.artifactID + '-' + dependency.version + ".pom";
InputStream input = new BOMInputStream(new URL(url).openStream());
MavenXpp3Reader xpp3Reader = new MavenXpp3Reader();
Model model = xpp3Reader.read(input);
input.close();
return model;
}
项目:commons-csv
文件:CSVParserTest.java
@Test
public void testBOMInputStream_ParserWithInputStream() throws IOException {
try (final BOMInputStream inputStream = createBOMInputStream("CSVFileParser/bom.csv");
final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
Assert.assertNotNull(string);
// System.out.println("date: " + record.get("Date"));
}
}
}
项目:es6draft
文件:ChakraTest.java
private static Charset charsetFor(BOMInputStream bis) throws IOException {
ByteOrderMark bom = bis.getBOM();
if (ByteOrderMark.UTF_8.equals(bom)) {
return StandardCharsets.UTF_8;
}
if (ByteOrderMark.UTF_16LE.equals(bom)) {
return StandardCharsets.UTF_16LE;
}
if (ByteOrderMark.UTF_16BE.equals(bom)) {
return StandardCharsets.UTF_16BE;
}
return StandardCharsets.UTF_8;
}
项目:olca-modules
文件:AbstractImport.java
public void run(File file, Seq seq, IDatabase database) throws Exception {
this.seq = seq;
this.database = database;
CsvPreference pref = new CsvPreference.Builder('"', ';', "\n").build();
try (FileInputStream fis = new FileInputStream(file);
// exclude the byte order mark, if there is any
BOMInputStream bom = new BOMInputStream(fis, false,
ByteOrderMark.UTF_8);
InputStreamReader reader = new InputStreamReader(bom, "utf-8");
BufferedReader buffer = new BufferedReader(reader);
CsvListReader csvReader = new CsvListReader(buffer, pref)) {
importFile(csvReader, database);
}
}
项目:olca-modules
文件:Maps.java
private static CsvListReader createReader(InputStream stream)
throws Exception {
CsvPreference pref = new CsvPreference.Builder('"', ';', "\n").build();
// exclude the byte order mark, if there is any
BOMInputStream bom = new BOMInputStream(stream, false,
ByteOrderMark.UTF_8);
InputStreamReader reader = new InputStreamReader(bom, "utf-8");
BufferedReader buffer = new BufferedReader(reader);
CsvListReader csvReader = new CsvListReader(buffer, pref);
return csvReader;
}
项目:storm-crawler
文件:CharsetIdentification.java
/**
* Detects any BOMs and returns the corresponding charset
*/
private static String getCharsetFromBOM(final byte[] byteData) {
BOMInputStream bomIn = new BOMInputStream(new ByteArrayInputStream(
byteData));
try {
ByteOrderMark bom = bomIn.getBOM();
if (bom != null) {
return bom.getCharsetName();
}
} catch (IOException e) {
return null;
}
return null;
}
项目:pentaho-kettle
文件:CsvInput.java
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws KettleException {
String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() );
String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() );
String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() );
try ( FileObject fileObject = KettleVFS.getFileObject( fileName, getTransMeta() );
BOMInputStream inputStream =
new BOMInputStream( KettleVFS.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE ) ) {
InputStreamReader reader = null;
if ( Utils.isEmpty( realEncoding ) ) {
reader = new InputStreamReader( inputStream );
} else {
reader = new InputStreamReader( inputStream, realEncoding );
}
EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() );
String line =
TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder(
1000 ) );
String[] fieldNames =
CsvInput.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() );
if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) {
removeEnclosure( fieldNames, csvInputMeta.getEnclosure() );
}
trimFieldNames( fieldNames );
return fieldNames;
} catch ( IOException e ) {
throw new KettleFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e );
}
}
项目:elasticsearch-river-remote
文件:SiteMapParser.java
/**
* Process a text-based Sitemap. Text sitemaps only list URLs but no priorities, last mods, etc.
*
* @param content
* @throws IOException
*/
private SiteMap processText(byte[] content, String sitemapUrl) throws IOException {
logger.debug("Processing textual Sitemap");
SiteMap textSiteMap = new SiteMap(sitemapUrl);
textSiteMap.setType(SitemapType.TEXT);
BOMInputStream bomIs = new BOMInputStream(new ByteArrayInputStream(content));
@SuppressWarnings("resource")
BufferedReader reader = new BufferedReader(new InputStreamReader(bomIs));
String line;
int i = 1;
while ((line = reader.readLine()) != null) {
if (line.length() > 0 && i <= MAX_URLS) {
try {
URL url = new URL(line);
boolean valid = urlIsLegal(textSiteMap.getBaseUrl(), url.toString());
if (valid || !strict) {
if (logger.isDebugEnabled()) {
StringBuffer sb = new StringBuffer(" ");
sb.append(i).append(". ").append(url);
logger.debug(sb.toString());
}
i++;
SiteMapURL surl = new SiteMapURL(url, valid);
textSiteMap.addSiteMapUrl(surl);
}
} catch (MalformedURLException e) {
logger.debug("Bad URL [" + line + "].");
}
}
}
textSiteMap.setProcessed(true);
return textSiteMap;
}
项目:languagetool
文件:Main.java
private void loadFile(File file) {
try (FileInputStream inputStream = new FileInputStream(file)) {
BOMInputStream bomIn = new BOMInputStream(inputStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE);
String charsetName;
if (bomIn.hasBOM()) {
bom = bomIn.getBOM();
charsetName = bom.getCharsetName();
} else {
// No BOM found
bom = null;
charsetName = null;
}
String fileContents = StringTools.readStream(bomIn, charsetName);
textArea.setText(fileContents);
currentFile = file;
updateTitle();
if(recentFiles.contains(file.getAbsolutePath())) {
recentFiles.remove(file.getAbsolutePath());
}
recentFiles.add(file.getAbsolutePath());
localStorage.saveProperty("recentFiles", recentFiles);
updateRecentFilesMenu();
} catch (IOException e) {
Tools.showError(e);
}
}
项目:languagetool
文件:Main.java
private InputStreamReader getInputStreamReader(String filename, String encoding) throws IOException {
String charsetName = encoding != null ? encoding : Charset.defaultCharset().name();
InputStream is = System.in;
if (!isStdIn(filename)) {
is = new FileInputStream(new File(filename));
BOMInputStream bomIn = new BOMInputStream(is, true, ByteOrderMark.UTF_8,
ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_32BE,ByteOrderMark.UTF_32LE);
if (bomIn.hasBOM() && encoding == null) {
charsetName = bomIn.getBOMCharsetName();
}
is = bomIn;
}
return new InputStreamReader(new BufferedInputStream(is), charsetName);
}
项目:sparql-generate
文件:LocatorURLAccept.java
private TypedInputStream openConnectionCheckRedirects(URLConnection c) throws IOException {
boolean redir;
int redirects = 0;
InputStream in = null;
String contentType = null;
String contentEncoding = null;
do {
if (c instanceof HttpURLConnection) {
((HttpURLConnection) c).setInstanceFollowRedirects(false);
}
// We want to open the input stream before getting headers
// because getHeaderField() et al swallow IOExceptions.
in = new BufferedInputStream(new BOMInputStream(c.getInputStream()));
contentType = c.getContentType();
contentEncoding = c.getContentEncoding();
redir = false;
if (c instanceof HttpURLConnection) {
HttpURLConnection http = (HttpURLConnection) c;
int stat = http.getResponseCode();
if (stat >= 300 && stat <= 307 && stat != 306
&& stat != HttpURLConnection.HTTP_NOT_MODIFIED) {
URL base = http.getURL();
String loc = http.getHeaderField("Location");
URL target = null;
if (loc != null) {
target = new URL(base, loc);
}
http.disconnect();
// Redirection should be allowed only for HTTP and HTTPS
// and should be limited to 5 redirections at most.
if (target == null
|| !(target.getProtocol().equals("http") || target.getProtocol().equals("https"))
|| c.getURL().getProtocol().equals("https") && target.getProtocol().equals("http")
|| redirects >= 5) {
throw new SecurityException("illegal URL redirect");
}
redir = true;
c = target.openConnection();
redirects++;
}
}
} while (redir);
if(contentType==null) {
contentType = "text/plain";
}
return new TypedInputStream(in, contentType, contentEncoding);
}
项目:AniML
文件:DataTable.java
public static DataTable loadCSV(String fileName, String formatType, VariableType[] colTypesOverride, String[] colNamesOverride, boolean hasHeaderRow) {
try {
// use apache commons io + csv to load but convert to list of String[]
// byte-order markers are handled if present at start of file.
FileInputStream fis = new FileInputStream(fileName);
final Reader reader = new InputStreamReader(new BOMInputStream(fis), "UTF-8");
CSVFormat format;
if ( formatType==null ) {
format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
}
else {
switch ( formatType.toLowerCase() ) {
case "tsv":
format = hasHeaderRow ? CSVFormat.TDF.withHeader() : CSVFormat.TDF;
break;
case "mysql":
format = hasHeaderRow ? CSVFormat.MYSQL.withHeader() : CSVFormat.MYSQL;
break;
case "excel":
format = hasHeaderRow ? CSVFormat.EXCEL.withHeader() : CSVFormat.EXCEL;
break;
case "rfc4180":
default:
format = hasHeaderRow ? CSVFormat.RFC4180.withHeader() : CSVFormat.RFC4180;
break;
}
}
final CSVParser parser = new CSVParser(reader, format);
List<String[]> rows = new ArrayList<>();
int numHeaderNames = parser.getHeaderMap().size();
try {
for (final CSVRecord record : parser) {
String[] row = new String[record.size()];
for (int j = 0; j<record.size(); j++) {
row[j] = record.get(j);
}
rows.add(row);
}
}
finally {
parser.close();
reader.close();
}
VariableType[] actualTypes = computeColTypes(rows, numHeaderNames);
Set<String> colNameSet = parser.getHeaderMap().keySet();
String[] colNames = colNameSet.toArray(new String[colNameSet.size()]);
if ( colNamesOverride!=null ) {
colNames = colNamesOverride;
}
if ( colTypesOverride!=null ) {
actualTypes = colTypesOverride;
}
return fromStrings(rows, actualTypes, colNames, false);
}
catch (Exception e) {
throw new IllegalArgumentException("Can't open and/or read "+fileName, e);
}
}
项目:AniML
文件:DataTable.java
public static DataTable loadCSV(String fileName, VariableType[] colTypes, boolean hasHeaderRow) {
int numCols = colTypes.length;
try {
final FileInputStream fis = new FileInputStream(fileName);
final Reader r = new InputStreamReader(new BOMInputStream(fis), "UTF-8");
final BufferedReader bf = new BufferedReader(r);
List<int[]> rows = new ArrayList<>();
String line;
String[] colNames = null;
if ( hasHeaderRow ) {
line=bf.readLine();
if ( line!=null ) {
line = line.trim();
if ( line.length()>0 ) {
colNames = line.split(",");
for (int i = 0; i<colNames.length; i++) {
colNames[i] = colNames[i].trim();
}
}
}
}
int n = 0;
while ( (line=bf.readLine())!=null ) {
if ( n>0 && n % 10000 == 0 ) System.out.println(n);
line = line.trim();
if ( line.length()==0 ) continue;
int[] row = new int[numCols];
int comma = line.indexOf(',', 0);
int prev = 0;
int col = 0;
while ( comma>=0 ) {
String v = line.substring(prev, comma);
row[col] = getValue(colTypes[col], v);
prev = comma+1;
comma = line.indexOf(',', comma+1);
col++;
}
// grab last element after last comma
String lastv = line.substring(prev, line.length());
row[col] = getValue(colTypes[col], lastv);
// System.out.println();
rows.add(row);
n++;
}
DataTable data = new DataTable(rows, colTypes, colNames, null);
return data;
}
catch (IOException ioe) {
throw new IllegalArgumentException("Can't open and/or read "+fileName, ioe);
}
}
项目:reference-ccda-validator
文件:ReferenceCCDAValidationService.java
private List<RefCCDAValidationResult> runValidators(String validationObjective, String referenceFileName,
MultipartFile ccdaFile) throws SAXException, Exception {
List<RefCCDAValidationResult> validatorResults = new ArrayList<>();
InputStream ccdaFileInputStream = null;
try {
ccdaFileInputStream = ccdaFile.getInputStream();
String ccdaFileContents = IOUtils.toString(new BOMInputStream(ccdaFileInputStream));
List<RefCCDAValidationResult> mdhtResults = doMDHTValidation(validationObjective, referenceFileName, ccdaFileContents);
if(mdhtResults != null && !mdhtResults.isEmpty()) {
logger.info("Adding MDHT results");
validatorResults.addAll(mdhtResults);
}
boolean isSchemaErrorInMdhtResults = mdhtResultsHaveSchemaError(mdhtResults);
boolean isObjectiveAllowingVocabularyValidation = objectiveAllowsVocabularyValidation(validationObjective);
if (!isSchemaErrorInMdhtResults && isObjectiveAllowingVocabularyValidation) {
List<RefCCDAValidationResult> vocabResults = doVocabularyValidation(validationObjective, referenceFileName, ccdaFileContents);
if(vocabResults != null && !vocabResults.isEmpty()) {
logger.info("Adding Vocabulary results");
validatorResults.addAll(vocabResults);
}
if(objectiveAllowsContentValidation(validationObjective)) {
List<RefCCDAValidationResult> contentResults = doContentValidation(validationObjective, referenceFileName, ccdaFileContents);
if(contentResults != null && !contentResults.isEmpty()) {
logger.info("Adding Content results");
validatorResults.addAll(contentResults);
}
} else {
logger.info("Skipping Content validation due to: "
+ "validationObjective (" + (validationObjective != null ? validationObjective : "null objective")
+ ") is not relevant or valid for Content validation");
}
} else {
String separator = !isObjectiveAllowingVocabularyValidation && isSchemaErrorInMdhtResults ? " and " : "";
logger.info("Skipping Vocabulary (and thus Content) validation due to: "
+ (isObjectiveAllowingVocabularyValidation ? "" : "validationObjective POSTed: "
+ (validationObjective != null ? validationObjective : "null objective") + separator)
+ (isSchemaErrorInMdhtResults ? "C-CDA Schema error(s) found" : ""));
}
} catch (IOException e) {
throw new RuntimeException("Error getting CCDA contents from provided file", e);
}finally {
closeFileInputStream(ccdaFileInputStream);
}
return validatorResults;
}
项目:gtfs-lib
文件:Entity.java
/**
* The main entry point into an Entity.Loader. Interprets each row of a CSV file within a zip file as a sinle
* GTFS entity, and loads them into a table.
*
* @param zip the zip file from which to read a table
*/
public void loadTable(ZipFile zip) throws IOException {
ZipEntry entry = zip.getEntry(tableName + ".txt");
if (entry == null) {
Enumeration<? extends ZipEntry> entries = zip.entries();
// check if table is contained within sub-directory
while (entries.hasMoreElements()) {
ZipEntry e = entries.nextElement();
if (e.getName().endsWith(tableName + ".txt")) {
entry = e;
feed.errors.add(new TableInSubdirectoryError(tableName, entry.getName().replace(tableName + ".txt", "")));
}
}
/* This GTFS table did not exist in the zip. */
if (this.isRequired()) {
feed.errors.add(new MissingTableError(tableName));
} else {
LOG.info("Table {} was missing but it is not required.", tableName);
}
if (entry == null) return;
}
LOG.info("Loading GTFS table {} from {}", tableName, entry);
InputStream zis = zip.getInputStream(entry);
// skip any byte order mark that may be present. Files must be UTF-8,
// but the GTFS spec says that "files that include the UTF byte order mark are acceptable"
InputStream bis = new BOMInputStream(zis);
CsvReader reader = new CsvReader(bis, ',', Charset.forName("UTF8"));
this.reader = reader;
boolean hasHeaders = reader.readHeaders();
if (!hasHeaders) {
feed.errors.add(new EmptyTableError(tableName));
}
while (reader.readRecord()) {
// reader.getCurrentRecord() is zero-based and does not include the header line, keep our own row count
if (++row % 500000 == 0) {
LOG.info("Record number {}", human(row));
}
loadOneRow(); // Call subclass method to produce an entity from the current row.
}
if (row == 0) {
feed.errors.add(new EmptyTableError(tableName));
}
}
项目:eMonocot
文件:BOMIgnoringBufferedReaderFactory.java
@Override
public BufferedReader create(Resource resource, String encoding)
throws UnsupportedEncodingException, IOException {
BOMInputStream bomInputStream = new BOMInputStream(resource.getInputStream());
return new BufferedReader(new InputStreamReader(bomInputStream, encoding));
}
项目:powop
文件:BOMIgnoringBufferedReaderFactory.java
@Override
public BufferedReader create(Resource resource, String encoding)
throws UnsupportedEncodingException, IOException {
BOMInputStream bomInputStream = new BOMInputStream(resource.getInputStream());
return new BufferedReader(new InputStreamReader(bomInputStream, encoding));
}
项目:spring-usc
文件:EncodingDetector.java
public static InputStreamReader getInputStreamReader(InputStream is, String encoding) throws IOException {
logger.debug("Reading stream: using encoding: " + encoding);
BOMInputStream bis = new BOMInputStream(is); //So that we can remove the BOM
return new InputStreamReader(bis, encoding);
}