Search This Blog

Thursday, February 25, 2016

Java File content detector - Apache Tika

1. renamed sample.txt to sample.pdf

2. Add tika-core-1.11.jar in your lib

3. java class
    Output : text/plain File is of type - .txt


import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;

public class TikaExample
{

    public static void main ( String [] args ) throws IOException, MimeTypeException
    {

      TikaConfig config = TikaConfig.getDefaultConfig ();

        // renamed sample.txt to sample.pdf
        File file = new File ( "D:/drvijay/Reliance/testing/sample.exe" );
        InputStream stream = new FileInputStream ( file );
        // whatever the source you get inputstream, please add buffer for mark/reset support,
        // else u may get exception
        InputStream bufferedIn = new BufferedInputStream ( stream );

        final Metadata metadata = new Metadata ();
        metadata.set ( Metadata.RESOURCE_NAME_KEY, file.getName () );
        MediaType mediaType = config.getMimeRepository ().detect ( bufferedIn, metadata );
        // MediaType mediaType = config.getMimeRepository ().detect ( bufferedIn, new Metadata() );
        MimeType mimeType = config.getMimeRepository ().forName ( mediaType.toString () );
        String extension = mimeType.getExtension ();

        System.out.println ( mimeType + " File is of type - " + extension );
    }
}

No comments:

Hit Counter


View My Stats