protected Object get(Class clazz, String id) { if (clazz.equals(String.class) || clazz.equals(StringType.class)) return id; if (clazz.equals(Character.class) || clazz.equals(CharacterType.class)) return (id == null || id.isEmpty() ? null : id.charAt(0)); if (clazz.equals(Byte.class) || clazz.equals(ByteType.class)) return Byte.valueOf(id); if (clazz.equals(Short.class) || clazz.equals(ShortType.class)) return Short.valueOf(id); if (clazz.equals(Integer.class) || clazz.equals(IntegerType.class)) return Integer.valueOf(id); if (clazz.equals(Long.class) || clazz.equals(LongType.class)) return Long.valueOf(id); if (clazz.equals(Float.class) || clazz.equals(FloatType.class)) return Float.valueOf(id); if (clazz.equals(Double.class) || clazz.equals(DoubleType.class)) return Double.valueOf(id); if (clazz.equals(Boolean.class) || clazz.equals(BooleanType.class)) return Boolean.valueOf(id); Map<String, Entity> entities = iEntities.get(clazz.getName()); if (entities != null) { Entity entity = entities.get(id); if (entity != null) return entity.getObject(); } for (Map.Entry<String, Map<String, Entity>> entry: iEntities.entrySet()) { Entity o = entry.getValue().get(id); if (o != null && clazz.isInstance(o.getObject())) return o.getObject(); } if (clazz.equals(Session.class)) return ((Entity)iEntities.get(Session.class.getName()).values().iterator().next()).getObject(); if (clazz.equals(Student.class)) return checkUnknown(clazz, id, iStudents.get(id)); if (iIsClone) return checkUnknown(clazz, id, iHibSession.get(clazz, clazz.equals(ItypeDesc.class) ? (Serializable) Integer.valueOf(id) : (Serializable) Long.valueOf(id))); return checkUnknown(clazz, id, null); }
private void addScalars(SQLQuery query) { query.addScalar("URI", StringType.INSTANCE); query.addScalar("foundTimestamp", LongType.INSTANCE); query.addScalar("canonicalTag", StringType.INSTANCE); query.addScalar("httpStatusCode", IntegerType.INSTANCE); query.addScalar("lastCrawledTimestamp", LongType.INSTANCE); query.addScalar("contentHashcode", StringType.INSTANCE); query.addScalar("backgroundId", ShortType.INSTANCE); query.addScalar("qualityScore", ShortType.INSTANCE); query.addScalar("spamScore", ShortType.INSTANCE); query.addScalar("readingLevel", ShortType.INSTANCE); query.addScalar("varietyTopicScore", ShortType.INSTANCE); query.addScalar("adScripts", ShortType.INSTANCE); query.addScalar("relevantImages", BooleanType.INSTANCE); query.addScalar("responseTime", IntegerType.INSTANCE); query.addScalar("pageSize", IntegerType.INSTANCE); query.addScalar("pageRank", DoubleType.INSTANCE); query.addScalar("qwLocale", StringType.INSTANCE); query.addScalar("onPageText", StringType.INSTANCE); query.addScalar("title", StringType.INSTANCE); query.addScalar("metaDescription", StringType.INSTANCE); query.addScalar("topicKeyword", StringType.INSTANCE); query.addScalar("robotsIndex", BooleanType.INSTANCE); query.addScalar("duplicateTitle", BooleanType.INSTANCE); query.addScalar("duplicateMetaDescription", BooleanType.INSTANCE); query.addScalar("duplicateContent", BooleanType.INSTANCE); query.addScalar("trailingSlashIssue", BooleanType.INSTANCE); query.addScalar("timeout", BooleanType.INSTANCE); query.addScalar("pagination", BooleanType.INSTANCE); query.addScalar("dcPrimary", BooleanType.INSTANCE); }
private void addScalars(SQLQuery query) { query.addScalar("resultFromTimestampMills", LongType.INSTANCE); query.addScalar("qualityScore", ShortType.INSTANCE); query.addScalar("totalLinks", LongType.INSTANCE); query.addScalar("internalURLs", LongType.INSTANCE); query.addScalar("internalNoindexURLs", LongType.INSTANCE); query.addScalar("internalIndexURLs", LongType.INSTANCE); query.addScalar("externalURLs", LongType.INSTANCE); query.addScalar("externalURLsDifferentDomains", LongType.INSTANCE); query.addScalar("crawledURLs", LongType.INSTANCE); query.addScalar("clientErrorURLs", LongType.INSTANCE); query.addScalar("timeoutURLs", LongType.INSTANCE); query.addScalar("redirectionURLs", LongType.INSTANCE); query.addScalar("serverErrorURLs", LongType.INSTANCE); query.addScalar("avgResponseTimeMills", IntegerType.INSTANCE); query.addScalar("medianResponseTimeMills", IntegerType.INSTANCE); query.addScalar("avgPageSize", LongType.INSTANCE); query.addScalar("totalCountOfRelevantKeywords", LongType.INSTANCE); query.addScalar("totalCountOfKeywords", LongType.INSTANCE); query.addScalar("avgReadingLevel", DoubleType.INSTANCE); query.addScalar("domainBrandName", StringType.INSTANCE); query.addScalar("homeDocument", StringType.INSTANCE); }
public ProjectSummary analyzeAvgReadingLevel(String projectDatabaseId) { long timebefore = System.currentTimeMillis(); Session session = null; try { session = sessionFactory.openSession(); StringBuffer sql = new StringBuffer(); sql.append("select avg(readinglevel) as readinglevel from URL_").append(projectDatabaseId).append(" where readinglevel>=0"); SQLQuery query = session.createSQLQuery(sql.toString()); query.addScalar("readinglevel", DoubleType.INSTANCE); ProjectSummary readinglevel = new ProjectSummary(); try { query.setResultTransformer(Transformers.aliasToBean(ProjectSummary.class)); readinglevel = (ProjectSummary) query.uniqueResult(); } catch (Exception e) { logger.info("Error calculating analyzeAvgReadingLevel..."); } if (readinglevel == null) readinglevel = new ProjectSummary(); System.out.println("Duration of analyzeAvgReadingLevel() : " + (System.currentTimeMillis() - timebefore)); return readinglevel; } finally { if (session != null) session.close(); } }
@Override public Type getReturnType(Type firstArgumentType, Mapping mapping) throws QueryException{ return new DoubleType(); }
@Override public Type getReturnType(Type firstArgumentType, Mapping mapping) { return new DoubleType(); }
protected void addScalars(SQLQuery query) { query.addScalar("id", StringType.INSTANCE); query.addScalar("parentId", StringType.INSTANCE); query.addScalar("partitionkey", IntegerType.INSTANCE); query.addScalar("URLName", StringType.INSTANCE); query.addScalar("canonicalTag", StringType.INSTANCE); query.addScalar("canonicalTagHashcode", StringType.INSTANCE); query.addScalar("firstFoundAnchorTextToThisURL", StringType.INSTANCE); query.addScalar("contentHashcode", StringType.INSTANCE); query.addScalar("depthFromDomainRoot", IntegerType.INSTANCE); query.addScalar("externalLink", BooleanType.INSTANCE); query.addScalar("externalHostName", StringType.INSTANCE); query.addScalar("externalLinksOnThisPage", IntegerType.INSTANCE); query.addScalar("externalLinksDifferentDomainsOnThisPage", IntegerType.INSTANCE); query.addScalar("foundAtURL", StringType.INSTANCE); query.addScalar("foundTimestamp", LongType.INSTANCE); query.addScalar("h1", StringType.INSTANCE); query.addScalar("h2", StringType.INSTANCE); query.addScalar("h3", StringType.INSTANCE); query.addScalar("httpStatusCode", IntegerType.INSTANCE); query.addScalar("internalLinksOnThisPage", IntegerType.INSTANCE); query.addScalar("followLinksToThisPage", LongType.INSTANCE); query.addScalar("nofollowLinksToThisPage", LongType.INSTANCE); query.addScalar("metaDescription", StringType.INSTANCE); query.addScalar("metaRobotsFollow", BooleanType.INSTANCE); query.addScalar("metaRobotsIndex", BooleanType.INSTANCE); query.addScalar("pageSize", IntegerType.INSTANCE); query.addScalar("redirectedToURL", StringType.INSTANCE); query.addScalar("responseTime", IntegerType.INSTANCE); query.addScalar("timeout", BooleanType.INSTANCE); query.addScalar("title", StringType.INSTANCE); query.addScalar("duplicateContent", BooleanType.INSTANCE); query.addScalar("duplicateMetaDescription", BooleanType.INSTANCE); query.addScalar("duplicateTitle", BooleanType.INSTANCE); query.addScalar("duplicateH1", BooleanType.INSTANCE); query.addScalar("canonicalTagIssue", BooleanType.INSTANCE); query.addScalar("headlinesNotInRightOrder", BooleanType.INSTANCE); query.addScalar("relNofollow", BooleanType.INSTANCE); query.addScalar("qualityScore", ShortType.INSTANCE); query.addScalar("facebookLikes", IntegerType.INSTANCE); query.addScalar("facebookShares", IntegerType.INSTANCE); query.addScalar("differentURLSameAnchor", BooleanType.INSTANCE); query.addScalar("googleAnalyticsCodeFound", BooleanType.INSTANCE); query.addScalar("newPrice", IntegerType.INSTANCE); query.addScalar("oldPrice", IntegerType.INSTANCE); query.addScalar("color", StringType.INSTANCE); query.addScalar("readingLevel", ShortType.INSTANCE); query.addScalar("varietyTopicScore", ShortType.INSTANCE); query.addScalar("onPageText", StringType.INSTANCE); query.addScalar("qwLocale", ShortType.INSTANCE); query.addScalar("relevantImages", BooleanType.INSTANCE); query.addScalar("adScripts", ShortType.INSTANCE); query.addScalar("normalizedText", StringType.INSTANCE); query.addScalar("pageRank", DoubleType.INSTANCE); query.addScalar("trailingSlashIssue", BooleanType.INSTANCE); query.addScalar("gzipIssue", BooleanType.INSTANCE); query.addScalar("externalLinkPower", ShortType.INSTANCE); query.addScalar("spamScore", ShortType.INSTANCE); query.addScalar("backgroundId", ShortType.INSTANCE); query.addScalar("pagination", BooleanType.INSTANCE); query.addScalar("dcPrimary", BooleanType.INSTANCE); query.addScalar("keywordOrientationShortTerm", BooleanType.INSTANCE); query.addScalar("keywordOrientationTwoTerms", BooleanType.INSTANCE); query.addScalar("topicKeywordOneTerm", StringType.INSTANCE); query.addScalar("topicKeywordTwoTerms", StringType.INSTANCE); query.addScalar("topicKeywordThreeTerms", StringType.INSTANCE); query.addScalar("normalizedTopicKeywordOneTerm", StringType.INSTANCE); query.addScalar("normalizedTopicKeywordTwoTerms", StringType.INSTANCE); query.addScalar("normalizedTopicKeywordThreeTerms", StringType.INSTANCE); query.addScalar("topicKeywordOneTermWeight", ShortType.INSTANCE); query.addScalar("topicKeywordTwoTermsWeight", ShortType.INSTANCE); query.addScalar("topicKeywordThreeTermsWeight", ShortType.INSTANCE); query.addScalar("changeCode", IntegerType.INSTANCE); query.addScalar("overwriteFlag", BooleanType.INSTANCE); query.addScalar("protocolRecord", BooleanType.INSTANCE); query.addScalar("relevantOnPageText", StringType.INSTANCE); }
/** * Compute computeExternalLinkPower for external URLs in DB * * @param projectDatabaseId */ public void computeExternalLinkPower(String projectDatabaseId) { long timebefore = System.currentTimeMillis(); Session session = null; try { session = sessionFactory.openSession(); StringBuffer sql = new StringBuffer(); sql.append("select coalesce(max(pagerank),0) AS maxPagerank, coalesce(avg(pagerank),0) AS avgPagerank from url_").append(projectDatabaseId).append(" where pagerank <> 0"); SQLQuery query = session.createSQLQuery(sql.toString()); query.addScalar("maxPagerank", DoubleType.INSTANCE); query.addScalar("avgPagerank", DoubleType.INSTANCE); double maxPagerank = 0; double avgPagerank = 0; @SuppressWarnings("unchecked") List<Object[]> averages = query.list(); sql.setLength(0); if (!averages.isEmpty()) { maxPagerank = (Double) averages.get(0)[0]; avgPagerank = (Double) averages.get(0)[1]; } double max1PRThreshold = 0; if (maxPagerank != 0) { // - 25% max1PRThreshold = (maxPagerank - (double) (maxPagerank / 100d * 25d)); if (max1PRThreshold < 0) max1PRThreshold = 0; } double avg1PRThreshold = 0; if (avgPagerank != 0) { // + 40% avg1PRThreshold = (avgPagerank + (double) (avgPagerank / 100d * 40d)); if (avg1PRThreshold < 0) avg1PRThreshold = 0; } sql.append("create temp table externalsource_").append(projectDatabaseId).append(" as (select urlname,pagerank from url_").append(projectDatabaseId).append(" where urlname in (select foundaturl from url_").append(projectDatabaseId) .append(" where externalLink=true))"); query = session.createSQLQuery(sql.toString()); query.executeUpdate(); sql.setLength(0); sql.append("update url_") .append(projectDatabaseId) .append(" as a set externalLinkPower = a.externalLinkPower + sub.externalLinkPower from (select case when pagerank >= " + max1PRThreshold + " then 50 when pagerank >= " + avg1PRThreshold + " then 35 when pagerank >= " + avgPagerank + " then 15 else 0 end as externalLinkPower,urlname from externalsource_").append(projectDatabaseId).append(") as sub where sub.urlname = a.foundaturl AND a.externallink = true AND a.externalLinkPower <> 0"); query = session.createSQLQuery(sql.toString()); query.executeUpdate(); sql.setLength(0); System.out.println("Duration of computeExternalLinkPower() : " + (System.currentTimeMillis() - timebefore) + " max1Threshold: " + max1PRThreshold + " avg1PRThreshold: " + avg1PRThreshold + " avgThreshold: " + avgPagerank); } finally { if (session != null) session.close(); } }