Changeset 7623
- Timestamp:
- Mar 7, 2019, 10:55:50 AM (5 years ago)
- Location:
- trunk/src/core/net/sf/basedb
- Files:
-
- 4 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/core/net/sf/basedb/core/File.java
r7573 r7623 32 32 import net.sf.basedb.core.data.MimeTypeData; 33 33 import net.sf.basedb.core.hibernate.TypeWrapper; 34 import net.sf.basedb.util.charset.CharsetDetector; 34 35 import net.sf.basedb.util.EqualsHelper; 35 36 import net.sf.basedb.util.FileUtil; … … 51 52 import java.io.InputStream; 52 53 import java.io.OutputStream; 54 import java.io.PipedInputStream; 55 import java.io.PipedOutputStream; 53 56 import java.net.URI; 54 57 import java.net.URISyntaxException; … … 1409 1412 newInternalFile = getNewFile(compress); 1410 1413 } 1414 String mimeType = getMimeType(); 1415 String charset = getCharacterSet(); 1416 // If the file is a text file and no charset has been detected we try UTF-8 detection 1417 boolean utf8Detect = mimeType != null && mimeType.startsWith("text/") && charset == null; 1411 1418 try 1412 1419 { 1413 uploadStream = new UploadStream(newInternalFile, true, checkMd5, compress );1420 uploadStream = new UploadStream(newInternalFile, true, checkMd5, compress, utf8Detect); 1414 1421 } 1415 1422 catch (IOException ex) … … 1689 1696 private boolean checkMd5; 1690 1697 1698 private CharsetTesterThread utf8Tester; 1699 1691 1700 /** 1692 1701 Create a new <code>UploadStream</code> and connect it to the … … 1697 1706 previously stored md5 sum, useful for making sure secondary storage is 1698 1707 working properly 1708 @param utf8Detect If set, we try to parse the file as UTF-8. If this works we set the 1709 encoding to UTF-8 1699 1710 @throws IOException If there is an error during the upload 1700 1711 */ 1701 private UploadStream(java.io.File file, boolean calculateMd5, boolean checkMd5, boolean compress )1712 private UploadStream(java.io.File file, boolean calculateMd5, boolean checkMd5, boolean compress, boolean utf8Detect) 1702 1713 throws IOException 1703 1714 { … … 1712 1723 } 1713 1724 closed = false; 1725 if (utf8Detect) 1726 { 1727 // Start a thread for parallell testing if the file can be parsed as UTF-8 1728 utf8Tester = new CharsetTesterThread(Charset.forName("UTF-8")); 1729 new Thread(utf8Tester).start(); 1730 } 1714 1731 } 1715 1732 … … 1721 1738 if (md5 != null) md5.update(b, off, len); 1722 1739 super.write(b, off, len); 1723 } 1724 1740 if (utf8Tester != null) utf8Tester.write(b, off, len); 1741 } 1742 1725 1743 @Override 1726 1744 public void write(byte[] b) … … 1744 1762 if (md5 != null) md5.update((byte)b); 1745 1763 super.write(b); 1764 if (utf8Tester != null) utf8Tester.write(b); 1746 1765 } 1747 1766 … … 1759 1778 return; 1760 1779 } 1780 String charset = null; 1781 if (utf8Tester != null) 1782 { 1783 utf8Tester.close(); 1784 if (utf8Tester.couldParse()) 1785 { 1786 charset = utf8Tester.getCharset().name(); 1787 } 1788 } 1761 1789 super.close(); 1762 1790 FileData data = getData(); … … 1767 1795 data.setRemovedBy(null); 1768 1796 data.setUrl(null); 1797 if (charset != null && data.getCharacterSet() == null) 1798 { 1799 data.setCharacterSet(charset); 1800 } 1769 1801 if (md5 != null) 1770 1802 { … … 1783 1815 } 1784 1816 } 1817 1818 class CharsetTesterThread 1819 implements Runnable 1820 { 1821 private final PipedOutputStream pop; 1822 private final PipedInputStream pip; 1823 private final CharsetDetector detector; 1824 1825 private Thread thread; 1826 // These variables are used by both threads 1827 private volatile boolean couldParse; 1828 private volatile boolean isRunning; 1829 1830 CharsetTesterThread(Charset charset) 1831 throws IOException 1832 { 1833 this.detector = new CharsetDetector(charset); 1834 this.pop = new PipedOutputStream(); 1835 this.pip = new PipedInputStream(pop, 4096); 1836 this.isRunning = true; 1837 } 1838 1839 /** 1840 Runs in a separate thread. 1841 */ 1842 @Override 1843 public void run() 1844 { 1845 thread = Thread.currentThread(); 1846 couldParse = detector.testIt(pip); 1847 isRunning = false; 1848 } 1849 1850 public Charset getCharset() 1851 { 1852 return detector.getCharset(); 1853 } 1854 1855 /** 1856 Returns TRUE if the file seems like valid (UTF-8). Note 1857 that this may not be correct until the close() method has 1858 been called. 1859 */ 1860 public boolean couldParse() 1861 { 1862 return couldParse; 1863 } 1864 1865 /** 1866 Runs in the main thread. Copy bytes to charset detector via 1867 the piped streams. 1868 */ 1869 public void write(byte[] b, int off, int len) 1870 { 1871 if (isRunning) 1872 { 1873 try 1874 { 1875 pop.write(b, off, len); 1876 } 1877 catch (IOException ex) 1878 { 1879 isRunning = false; 1880 } 1881 } 1882 } 1883 1884 /** 1885 Runs in the main thread. Copy bytes to charset detector via 1886 the piped streams. 1887 */ 1888 public void write(int b) 1889 { 1890 if (isRunning) 1891 { 1892 try 1893 { 1894 pop.write(b); 1895 } 1896 catch (IOException ex) 1897 { 1898 isRunning = false; 1899 } 1900 } 1901 } 1902 1903 /** 1904 The file has been uploaded. Wait for the charset detector 1905 threas to finish and the close everything. 1906 */ 1907 public void close() 1908 { 1909 try 1910 { 1911 // Flush and close the pipe and... 1912 pop.flush(); 1913 FileUtil.close(pop); 1914 if (isRunning && thread != null) 1915 { 1916 // ... wait for the reading thread to terminate 1917 thread.join(); 1918 } 1919 } 1920 catch (IOException | InterruptedException ex) 1921 {} 1922 finally 1923 { 1924 thread = null; 1925 FileUtil.close(pop); 1926 FileUtil.close(pip); 1927 } 1928 } 1929 } 1930 1785 1931 }
Note: See TracChangeset
for help on using the changeset viewer.