Python magic 模块，from_file() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用magic.from_file()。

项目：S4 作者：MichaelAquilina | 项目源码 | 文件源码

def _load_index(self):
        index_path = self.index_path()
        if not os.path.exists(index_path):
            return {}

        content_type = magic.from_file(index_path, mime=True)
        if content_type == 'text/plain':
            logger.debug('Detected plaintext encoding for reading index')
            method = open
        elif content_type in ('application/gzip', 'application/x-gzip'):
            logger.debug('Detected gzip encoding for reading index')
            method = gzip.open
        else:
            raise ValueError('Index is of unknown type', content_type)

        with method(index_path, 'rt') as fp:
            data = json.load(fp)
        return data

项目：refextract 作者：inspirehep | 项目源码 | 文件源码

def get_plaintext_document_body(fpath, keep_layout=False):
    """Given a file-path to a full-text, return a list of unicode strings
       whereby each string is a line of the fulltext.
       In the case of a plain-text document, this simply means reading the
       contents in from the file. In the case of a PDF however,
       this means converting the document to plaintext.
       It raises UnknownDocumentTypeError if the document is not a PDF or
       plain text.
       @param fpath: (string) - the path to the fulltext file
       @return: (list) of strings - each string being a line in the document.
    """
    textbody = []
    mime_type = magic.from_file(fpath, mime=True)

    if mime_type == "text/plain":
        with open(fpath, "r") as f:
            textbody = [line.decode("utf-8") for line in f.readlines()]

    elif mime_type == "application/pdf":
        textbody = convert_PDF_to_plaintext(fpath, keep_layout)

    else:
        raise UnknownDocumentTypeError(mime_type)

    return textbody

项目：malgazer 作者：keithjjones | 项目源码 | 文件源码

def __init__(self, filename):
        """
        Creates a file object for a malware sample.

        :param filename:  The file name of the available malware sample.
        """
        if not os.path.exists(filename):
            raise ValueError("File {0} does not exist!".format(filename))

        # Default settings of members
        self.running_entropy_data = None
        self.running_entropy_window_size = 0
        self.file_size = 0
        self.parsedfile = None

        # Fill out other data here...
        self.filename = filename
        self.data = list()
        self.filetype = magic.from_file(self.filename)
        self._read_file()
        self._parse_file_type()

项目：Snakepit 作者：K4lium | 项目源码 | 文件源码

def get_type(self):
        try:
            ms = magic.open(magic.MAGIC_NONE)
            ms.load()
            file_type = ms.file(self.path)
        except:
            try:
                file_type = magic.from_file(self.path)
            except:
                try:
                    import subprocess
                    file_process = subprocess.Popen(['file', '-b', self.path], stdout = subprocess.PIPE)
                    file_type = file_process.stdout.read().strip()
                except:
                    return ''
        finally:
            try:
                ms.close()
            except:
                pass

        return file_type

项目：open-wob-api 作者：openstate | 项目源码 | 文件源码

def file_parser(fname, pages=None):
    if magic.from_file(fname, mime=True) == 'application/pdf':
        try:
            text_array = []
            d = pdf.Document(fname)
            for i, p in enumerate(d, start=1):
                for f in p:
                    for b in f:
                        for l in b:
                            text_array.append(l.text.encode('UTF-8'))

                if i == pages:  # break after x pages
                    break

            print "Processed %i pages" % (i)
            return '\n'.join(text_array)
        except Exception as e:
            print "PDF Parser Exception: ", e
    else:
        try:
            content = parser.from_file(fname)['content']
            return (content or '').encode('UTF-8')
        except Exception as e:
            print "File Parser Exception: ", e

项目：ehForwarderBot 作者：blueset | 项目源码 | 文件源码

def save_file(self, msg, msg_type):
        path = os.path.join("storage", self.channel_id)
        if not os.path.exists(path):
            os.makedirs(path)
        filename = "%s_%s_%s" % (msg_type, msg['NewMsgId'], int(time.time()))
        fullpath = os.path.join(path, filename)
        msg['Text'](fullpath)
        mime = magic.from_file(fullpath, mime=True)
        if isinstance(mime, bytes):
            mime = mime.decode()
        guess_ext = mimetypes.guess_extension(mime) or ".unknown"
        if guess_ext == ".unknown":
            self.logger.warning("File %s with mime %s has no matching extensions.", fullpath, mime)
        ext = ".jpeg" if mime == "image/jpeg" else guess_ext
        os.rename(fullpath, "%s%s" % (fullpath, ext))
        fullpath = "%s%s" % (fullpath, ext)
        self.logger.info("File saved from WeChat\nFull path: %s\nMIME: %s", fullpath, mime)
        return fullpath, mime

项目：gibbersense 作者：smxlabs | 项目源码 | 文件源码

def file_magic(in_file):


   print "\n\t\tFile Type :", magic.from_file(in_file)

项目：polichombr 作者：ANSSI-FR | 项目源码 | 文件源码

def do_sample_type_detect(datafile):
        """
            Checks the datafile type's.
        """
        mtype = magic.from_file(datafile, mime=True)
        stype = magic.from_file(datafile)
        return (mtype, stype)

项目：csirtg-smrt-py 作者：csirtgadgets | 项目源码 | 文件源码

def _process_cache(self, split="\n", rstrip=True):
        try:
            ftype = magic.from_file(self.cache, mime=True)
        except AttributeError:
            try:
                mag = magic.open(magic.MAGIC_MIME)
                mag.load()
                ftype = mag.file(self.cache)
            except AttributeError as e:
                raise RuntimeError('unable to detect cached file type')

        if PYVERSION < 3:
            ftype = ftype.decode('utf-8')

        if ftype.startswith('application/x-gzip') or ftype.startswith('application/gzip'):
            from csirtg_smrt.decoders.zgzip import get_lines
            for l in get_lines(self.cache, split=split):
                yield l

            return

        if ftype == "application/zip":
            from csirtg_smrt.decoders.zzip import get_lines
            for l in get_lines(self.cache, split=split):
                yield l

            return

        # all others, mostly txt, etc...
        with open(self.cache) as f:
            for l in f:
                yield l

项目：csirtg-smrt-py 作者：csirtgadgets | 项目源码 | 文件源码

def get_mimetype(f):
    try:
        ftype = magic.from_file(f, mime=True)
    except AttributeError:
        try:
            mag = magic.open(magic.MAGIC_MIME)
            mag.load()
            ftype = mag.file(f)
        except AttributeError as e:
            raise RuntimeError('unable to detect cached file type')

    if PYVERSION < 3:
        ftype = ftype.decode('utf-8')

    return ftype

项目：do-portal 作者：certeu | 项目源码 | 文件源码

def preprocess(sample):
    """Preprocess files after upload.

    :param sample: :class:`~app.models.Sample`
    :return:
    """
    hash_path = os.path.join(
        current_app.config['APP_UPLOADS_SAMPLES'],
        sample.sha256
    )
    if zipfile.is_zipfile(hash_path):
        mt = magic.from_file(hash_path, mime=True)
        if mt in skip_mimes:
            return None
        current_app.log.debug('Extracting {}'.format(hash_path))
        zfile = zipfile.ZipFile(hash_path)
        for zipfo in zfile.namelist():
            cfg = current_app.config
            if zfile.getinfo(zipfo).compress_type == 99:  # PK compat. v5.1
                pwd = '-p{}'.format(cfg['INFECTED_PASSWD'])
                with popen('7z', 'e', '-so', pwd, hash_path) as zproc:
                    buf, stderr = zproc.communicate()
            else:
                buf = zfile.read(zipfo,
                                 pwd=bytes(cfg['INFECTED_PASSWD'], 'utf-8'))
            digests = get_hashes(buf)
            hash_path = os.path.join(cfg['APP_UPLOADS_SAMPLES'],
                                     digests.sha256)
            if not os.path.isfile(hash_path):
                with open(hash_path, 'wb') as wf:
                    wf.write(buf)
            s = Sample(user_id=sample.user_id, filename=zipfo,
                       parent_id=sample.id,
                       md5=digests.md5, sha1=digests.sha1,
                       sha256=digests.sha256, sha512=digests.sha512,
                       ctph=digests.ctph)
            db.session.add(s)
            db.session.commit()

项目：style50 作者：cs50 | 项目源码 | 文件源码

def _check(self, file):
        """
        Run apropriate check based on `file`'s extension and return it,
        otherwise raise an Error
        """

        if not os.path.exists(file):
            raise Error("file \"{}\" not found".format(file))

        _, extension = os.path.splitext(file)
        try:
            check = self.extension_map[extension[1:]]
        except KeyError:
            magic_type = magic.from_file(file)
            for name, cls in self.magic_map.items():
                if name in magic_type:
                    check = cls
                    break
            else:
                raise Error("unknown file type \"{}\", skipping...".format(file))

        try:
            with open(file) as f:
                code = f.read()
        except UnicodeDecodeError:
            raise Error("file does not seem to contain text, skipping...")

        # Ensure we don't warn about adding trailing newline
        try:
            if code[-1] != '\n':
                code += '\n'
        except IndexError:
            pass

        return check(code)

项目：oclubs 作者：SHSIDers | 项目源码 | 文件源码

def handle(cls, user, club, file):
        filename = os.urandom(8).encode('hex')
        temppath = os.path.join('/tmp', filename)
        file.save(temppath)

        try:
            # Don't use mimetypes.guess_type(temppath) -- Faked extensions
            mime = magic.from_file(temppath, mime=True)
            if mime not in cls._mimedict:
                raise UploadNotSupported

            filename = filename + cls._mimedict[mime]
            permpath = cls.mk_internal_path(filename)
            permdir = os.path.dirname(permpath)
            if not os.path.isdir(permdir):
                os.makedirs(permdir, 0o755)

            # resize to 600, 450
            cls._thumb(temppath, permpath)
            fs.watch(permpath)
        finally:
            os.remove(temppath)

        obj = cls.new()
        obj.club = club
        obj.uploader = user
        obj._location = filename
        obj.mime = mime
        return obj.create()

项目：validatemyfile 作者：daisieh | 项目源码 | 文件源码

def check(filepath):
    result = magic.from_file(filepath, mime=True)
    if re.match('application/pdf', result):
        return True
    return False

项目：guest-images 作者：S2E | 项目源码 | 文件源码

def get_magic(filename):
    if g_m:
        return g_m.file(filename)
    else:
        return magic.from_file(filename)

项目：PeekabooAV 作者：scVENUS | 项目源码 | 文件源码

def guess_mime_type_from_file_contents(file_path):
    """  Get type from file magic bytes. """
    mt = magic.from_file(file_path, mime=True)
    if mt:
        return mt

项目：fame 作者：certsocietegenerale | 项目源码 | 文件源码

def _compute_default_properties(self):
        self['names'] = [os.path.basename(self['filepath'])]
        self['detailed_type'] = magic.from_file(self['filepath'])
        self['mime'] = magic.from_file(self['filepath'], mime=True)
        self['analysis'] = []

        # Init antivirus status
        self['antivirus'] = {}

        for module in dispatcher.get_antivirus_modules():
            self['antivirus'][module.name] = False

        self._set_type()

    # Convert mime/types into clearer type