package org.apache.mahout.cf.taste.hadoop.item;

import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.time.DateUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
import org.apache.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob;
import org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.VarLongWritable;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasures;

/* loaded from: input_file:org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.class */
public final class RecommenderJob extends AbstractJob {
    public static final String BOOLEAN_DATA = "booleanData";
    public static final String DEFAULT_PREPARE_PATH = "preparePreferenceMatrix";
    private static final int DEFAULT_MAX_SIMILARITIES_PER_ITEM = 100;
    private static final int DEFAULT_MAX_PREFS = 500;
    private static final int DEFAULT_MIN_PREFS_PER_USER = 1;

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws Exception {
        addInputOption();
        addOutputOption();
        addOption("numRecommendations", "n", "Number of recommendations per user", String.valueOf(10));
        addOption("usersFile", (String) null, "File of users to recommend for", (String) null);
        addOption("itemsFile", (String) null, "File of items to recommend for", (String) null);
        addOption("filterFile", "f", "File containing comma-separated userID,itemID pairs. Used to exclude the item from the recommendations for that user (optional)", (String) null);
        addOption("userItemFile", "uif", "File containing comma-separated userID,itemID pairs (optional). Used to include only these items into recommendations. Cannot be used together with usersFile or itemsFile", (String) null);
        addOption(BOOLEAN_DATA, WikipediaTokenizer.BOLD, "Treat input as without pref values", Boolean.FALSE.toString());
        addOption("maxPrefsPerUser", "mxp", "Maximum number of preferences considered per user in final recommendation phase", String.valueOf(10));
        addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this in the similarity computation (default: 1)", String.valueOf(1));
        addOption("maxSimilaritiesPerItem", FuzzyKMeansDriver.M_OPTION, "Maximum number of similarities considered per item ", String.valueOf(100));
        addOption("maxPrefsInItemSimilarity", "mpiis", "max number of preferences to consider per user or item in the item similarity computation phase, users or items with more preferences will be sampled down (default: 500)", String.valueOf(500));
        addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')', true);
        addOption(DefaultOptionCreator.THRESHOLD_OPTION, "tr", "discard item pairs with a similarity value below this", false);
        addOption("outputPathForSimilarityMatrix", "opfsm", "write the item similarity matrix to this path (optional)", false);
        addOption(DefaultOptionCreator.RANDOM_SEED, (String) null, "use this seed for sampling", false);
        addFlag("sequencefileOutput", null, "write the output into a SequenceFile instead of a text file");
        Map<String, List<String>> parseArguments = parseArguments(strArr);
        if (parseArguments == null) {
            return -1;
        }
        Path outputPath = getOutputPath();
        int parseInt = Integer.parseInt(getOption("numRecommendations"));
        String option = getOption("usersFile");
        String option2 = getOption("itemsFile");
        String option3 = getOption("filterFile");
        String option4 = getOption("userItemFile");
        boolean booleanValue = Boolean.valueOf(getOption(BOOLEAN_DATA)).booleanValue();
        int parseInt2 = Integer.parseInt(getOption("maxPrefsPerUser"));
        int parseInt3 = Integer.parseInt(getOption("minPrefsPerUser"));
        int parseInt4 = Integer.parseInt(getOption("maxPrefsInItemSimilarity"));
        int parseInt5 = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
        String option5 = getOption("similarityClassname");
        double parseDouble = hasOption(DefaultOptionCreator.THRESHOLD_OPTION) ? Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION)) : Double.MIN_VALUE;
        long parseLong = hasOption(DefaultOptionCreator.RANDOM_SEED) ? Long.parseLong(getOption(DefaultOptionCreator.RANDOM_SEED)) : Long.MIN_VALUE;
        Path tempPath = getTempPath(DEFAULT_PREPARE_PATH);
        Path tempPath2 = getTempPath("similarityMatrix");
        Path tempPath3 = getTempPath("explicitFilterPath");
        Path tempPath4 = getTempPath("partialMultiply");
        AtomicInteger atomicInteger = new AtomicInteger();
        int i = -1;
        if (shouldRunNextPhase(parseArguments, atomicInteger)) {
            ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[]{"--input", getInputPath().toString(), "--output", tempPath.toString(), "--minPrefsPerUser", String.valueOf(parseInt3), "--booleanData", String.valueOf(booleanValue), "--tempDir", getTempPath().toString()});
            i = HadoopUtil.readInt(new Path(tempPath, PreparePreferenceMatrixJob.NUM_USERS), getConf());
        }
        if (shouldRunNextPhase(parseArguments, atomicInteger)) {
            if (i == -1) {
                i = (int) HadoopUtil.countRecords(new Path(tempPath, PreparePreferenceMatrixJob.USER_VECTORS), PathType.LIST, null, getConf());
            }
            ToolRunner.run(getConf(), new RowSimilarityJob(), new String[]{"--input", new Path(tempPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output", tempPath2.toString(), "--numberOfColumns", String.valueOf(i), "--similarityClassname", option5, "--maxObservationsPerRow", String.valueOf(parseInt4), "--maxObservationsPerColumn", String.valueOf(parseInt4), "--maxSimilaritiesPerRow", String.valueOf(parseInt5), "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(parseDouble), "--randomSeed", String.valueOf(parseLong), "--tempDir", getTempPath().toString()});
            if (hasOption("outputPathForSimilarityMatrix")) {
                Job prepareJob = prepareJob(tempPath2, new Path(getOption("outputPathForSimilarityMatrix")), SequenceFileInputFormat.class, ItemSimilarityJob.MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class, ItemSimilarityJob.MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class, TextOutputFormat.class);
                Configuration configuration = prepareJob.getConfiguration();
                configuration.set(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR, new Path(tempPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());
                configuration.setInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, parseInt5);
                prepareJob.waitForCompletion(true);
            }
        }
        if (shouldRunNextPhase(parseArguments, atomicInteger)) {
            Job job = new Job(getConf(), "partialMultiply");
            Configuration configuration2 = job.getConfiguration();
            MultipleInputs.addInputPath(job, tempPath2, SequenceFileInputFormat.class, SimilarityMatrixRowWrapperMapper.class);
            MultipleInputs.addInputPath(job, new Path(tempPath, PreparePreferenceMatrixJob.USER_VECTORS), SequenceFileInputFormat.class, UserVectorSplitterMapper.class);
            job.setJarByClass(ToVectorAndPrefReducer.class);
            job.setMapOutputKeyClass(VarIntWritable.class);
            job.setMapOutputValueClass(VectorOrPrefWritable.class);
            job.setReducerClass(ToVectorAndPrefReducer.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setOutputKeyClass(VarIntWritable.class);
            job.setOutputValueClass(VectorAndPrefsWritable.class);
            configuration2.setBoolean("mapred.compress.map.output", true);
            configuration2.set("mapred.output.dir", tempPath4.toString());
            if (option != null) {
                configuration2.set("usersFile", option);
            }
            if (option4 != null) {
                configuration2.set("userItemFile", option4);
            }
            configuration2.setInt("maxPrefsPerUserConsidered", parseInt2);
            if (!job.waitForCompletion(true)) {
                return -1;
            }
        }
        if (!shouldRunNextPhase(parseArguments, atomicInteger)) {
            return 0;
        }
        if (option3 != null && !prepareJob(new Path(option3), tempPath3, TextInputFormat.class, ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class, ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class, SequenceFileOutputFormat.class).waitForCompletion(true)) {
            return -1;
        }
        String path = tempPath4.toString();
        if (option3 != null) {
            path = path + StringUtils.COMMA_STR + tempPath3;
        }
        Job prepareJob2 = prepareJob(new Path(path), outputPath, SequenceFileInputFormat.class, PartialMultiplyMapper.class, VarLongWritable.class, PrefAndSimilarityColumnWritable.class, AggregateAndRecommendReducer.class, VarLongWritable.class, RecommendedItemsWritable.class, parseArguments.containsKey("--sequencefileOutput") ? SequenceFileOutputFormat.class : TextOutputFormat.class);
        Configuration configuration3 = prepareJob2.getConfiguration();
        if (option2 != null) {
            configuration3.set("itemsFile", option2);
        }
        if (option4 != null) {
            configuration3.set("userItemFile", option4);
        }
        if (option3 != null) {
            setS3SafeCombinedInputPath(prepareJob2, getTempPath(), tempPath4, tempPath3);
        }
        setIOSort(prepareJob2);
        configuration3.set("itemIDIndexPath", new Path(tempPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());
        configuration3.setInt("numRecommendations", parseInt);
        configuration3.setBoolean(BOOLEAN_DATA, booleanValue);
        return !prepareJob2.waitForCompletion(true) ? -1 : 0;
    }

    private static void setIOSort(JobContext jobContext) {
        Configuration configuration = jobContext.getConfiguration();
        configuration.setInt(CommonConfigurationKeysPublic.IO_SORT_FACTOR_KEY, 100);
        String str = configuration.get("mapred.map.child.java.opts");
        if (str == null) {
            str = configuration.get(JobConf.MAPRED_TASK_JAVA_OPTS);
        }
        int i = 512;
        if (str != null) {
            Matcher matcher = Pattern.compile("-Xmx([0-9]+)([mMgG])").matcher(str);
            if (matcher.find()) {
                i = Integer.parseInt(matcher.group(1));
                if ("g".equalsIgnoreCase(matcher.group(2))) {
                    i *= 1024;
                }
            }
        }
        configuration.setInt(CommonConfigurationKeysPublic.IO_SORT_MB_KEY, Math.min(i / 2, 1024));
        configuration.setInt("mapred.task.timeout", DateUtils.MILLIS_IN_HOUR);
    }

    public static void main(String[] strArr) throws Exception {
        ToolRunner.run(new Configuration(), new RecommenderJob(), strArr);
    }
}
